Actual pretty-printing!
Now we're cooking with gas ALTHOUGH now we have to deal with the fact that we're gluing everything together where there *should* be spaces. Many more improvements to come.
This commit is contained in:
parent
5d88b459b9
commit
d6dd54f4df
3 changed files with 201 additions and 21 deletions
|
|
@ -2673,29 +2673,48 @@ highlight = _Highlight()
|
|||
|
||||
|
||||
###############################################################################
|
||||
# Pretty-printing metadata support
|
||||
# Formatting (pretty-printing) metadata support
|
||||
###############################################################################
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FormatMeta(SyntaxMeta):
|
||||
newline: bool = False
|
||||
newline: str | None = None
|
||||
indent: int | None = None
|
||||
group: bool = False
|
||||
|
||||
|
||||
def group(*rules: Rule) -> Rule:
|
||||
"""Indicates that the text should be put on a single line if possible
|
||||
during pretty-printing. Has no effect on parsing.
|
||||
"""
|
||||
return mark(seq(*rules), format=FormatMeta(group=True))
|
||||
|
||||
|
||||
def indent(*rules: Rule, amount: int | None = None) -> Rule:
|
||||
"""Indicates a new level indentation during pretty-printing. The provided
|
||||
rules are otherwise treated as if they were in a sequence. This rule has
|
||||
no effect on parsing otherwise.
|
||||
|
||||
The specified amount is the number of "indentation" values to indent the
|
||||
lines with. It defaults to 1.
|
||||
"""
|
||||
if amount is None:
|
||||
amount = 4
|
||||
amount = 1
|
||||
return mark(seq(*rules), format=FormatMeta(indent=amount))
|
||||
|
||||
|
||||
def newline() -> Rule:
|
||||
return mark(Nothing, format=FormatMeta(newline=True))
|
||||
def newline(text: str | None = None) -> Rule:
|
||||
"""Indicate that, during pretty-printing, the line can be broken here. Has
|
||||
no effect parsing.
|
||||
|
||||
If text is provided, the text will be inserted before the line break. This
|
||||
allows for e.g. trailing commas in lists and whatnot to make things look
|
||||
prettier, when supported.
|
||||
"""
|
||||
if text is None:
|
||||
text = ""
|
||||
return mark(Nothing, format=FormatMeta(newline=text))
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
|
|
|||
152
parser/wadler.py
152
parser/wadler.py
|
|
@ -58,6 +58,145 @@ class Lazy:
|
|||
Document = None | Text | NewLine | Cons | Indent | Group | Lazy
|
||||
|
||||
|
||||
class DocumentLayout:
|
||||
segments: list[str | tuple[int, int]]
|
||||
|
||||
def __init__(self, segments):
|
||||
self.segments = segments
|
||||
|
||||
def apply_to_source(self, original: str) -> str:
|
||||
result = ""
|
||||
for segment in self.segments:
|
||||
if isinstance(segment, str):
|
||||
result += segment
|
||||
else:
|
||||
start, end = segment
|
||||
result += original[start:end]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def layout_document(doc: Document, width: int) -> DocumentLayout:
|
||||
"""Lay out a document to fit within the given width.
|
||||
|
||||
The result of this function is a layout which can trivially be converted
|
||||
into a string given the original document.
|
||||
"""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Chunk:
|
||||
doc: Document
|
||||
indent: int
|
||||
flat: bool
|
||||
|
||||
def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
|
||||
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
|
||||
|
||||
column = 0
|
||||
chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
|
||||
|
||||
def fits(chunk: Chunk) -> bool:
|
||||
remaining = width - column
|
||||
if remaining <= 0:
|
||||
return False
|
||||
|
||||
stack = list(chunks)
|
||||
stack.append(chunk)
|
||||
while len(stack) > 0:
|
||||
chunk = stack.pop()
|
||||
match chunk.doc:
|
||||
case None:
|
||||
pass
|
||||
|
||||
case Text(start, end):
|
||||
remaining -= end - start
|
||||
|
||||
case NewLine():
|
||||
if chunk.flat:
|
||||
# These are newlines that have been rendered flat,
|
||||
# they are spaces I guess? TODO: Consider alternate
|
||||
# forms, something that "goes here instead of
|
||||
# newline", like maybe the empty string or... what?
|
||||
remaining -= 1
|
||||
else:
|
||||
# These are newlines that are real, so it must have
|
||||
# all fit.
|
||||
return True
|
||||
|
||||
case Cons(left, right):
|
||||
stack.append(chunk.with_document(right))
|
||||
stack.append(chunk.with_document(left))
|
||||
|
||||
case Lazy():
|
||||
stack.append(chunk.with_document(chunk.doc.resolve()))
|
||||
|
||||
case Indent(amount, child):
|
||||
stack.append(chunk.with_document(child, and_indent=amount))
|
||||
|
||||
case Group(child):
|
||||
# The difference between this approach and Justin's twist
|
||||
# is that we consider the flat variable in Newline(),
|
||||
# above, rather than here in Group. This makes us more
|
||||
# like Wadler's original formulation, I guess. The
|
||||
# grouping is an implicit transform over alternatives
|
||||
# represented by newline. (If we have other kinds of
|
||||
# alternatives we'll have to work those out elsewhere as
|
||||
# well.)
|
||||
stack.append(chunk.with_document(child))
|
||||
|
||||
case _:
|
||||
typing.assert_never(chunk.doc)
|
||||
|
||||
if remaining < 0:
|
||||
return False
|
||||
|
||||
return True # Everything must fit, so great!
|
||||
|
||||
output: list[str | tuple[int, int]] = []
|
||||
while len(chunks) > 0:
|
||||
chunk = chunks.pop()
|
||||
match chunk.doc:
|
||||
case None:
|
||||
pass
|
||||
|
||||
case Text(start, end):
|
||||
output.append((start, end))
|
||||
column += end - start
|
||||
|
||||
case NewLine():
|
||||
if chunk.flat:
|
||||
# TODO: Custom newline flat mode. See also the
|
||||
# corresponding comment in the "fits" function.
|
||||
output.append(" ")
|
||||
column += 1
|
||||
else:
|
||||
# TODO: Custom newline expansion, custom indent segments.
|
||||
output.append("\n" + (chunk.indent * " "))
|
||||
column = chunk.indent
|
||||
|
||||
case Cons(left, right):
|
||||
chunks.append(chunk.with_document(right))
|
||||
chunks.append(chunk.with_document(left))
|
||||
|
||||
case Indent(amount, doc):
|
||||
chunks.append(chunk.with_document(doc, and_indent=amount))
|
||||
|
||||
case Lazy():
|
||||
chunks.append(chunk.with_document(chunk.doc.resolve()))
|
||||
|
||||
case Group(child):
|
||||
candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
|
||||
if chunk.flat or fits(candidate):
|
||||
chunks.append(candidate)
|
||||
else:
|
||||
chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
|
||||
|
||||
case _:
|
||||
typing.assert_never(chunk)
|
||||
|
||||
return DocumentLayout(output)
|
||||
|
||||
|
||||
def resolve_document(doc: Document) -> Document:
|
||||
match doc:
|
||||
case Cons(left, right):
|
||||
|
|
@ -75,12 +214,9 @@ def resolve_document(doc: Document) -> Document:
|
|||
return doc
|
||||
|
||||
|
||||
def layout_document(doc: Document) -> typing.Generator[str, None, None]:
|
||||
del doc
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
|
||||
# TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION
|
||||
# The naming condition is important but
|
||||
if isinstance(child, runtime.Tree):
|
||||
return f"tree_{child.name}"
|
||||
else:
|
||||
|
|
@ -230,7 +366,7 @@ class Printer:
|
|||
generated_grammar.append((rule_name, tx_children))
|
||||
tx_children = [rule_name]
|
||||
|
||||
if pretty.newline:
|
||||
if pretty.newline is not None:
|
||||
if not done_newline:
|
||||
generated_grammar.append(("newline", []))
|
||||
done_newline = True
|
||||
|
|
@ -272,6 +408,6 @@ class Printer:
|
|||
)
|
||||
return resolve_document(m)
|
||||
|
||||
def format_tree(self, tree: runtime.Tree) -> str:
|
||||
def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
|
||||
doc = self.convert_tree_to_document(tree)
|
||||
return next(layout_document(doc))
|
||||
return layout_document(doc, width)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import typing
|
||||
|
||||
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt
|
||||
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
|
||||
|
||||
import parser.runtime as runtime
|
||||
import parser.wadler as wadler
|
||||
|
|
@ -29,13 +29,13 @@ class JsonGrammar(Grammar):
|
|||
|
||||
@rule
|
||||
def object(self):
|
||||
return group(self.LCURLY + opt(self._object_pairs) + self.RCURLY)
|
||||
return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
|
||||
|
||||
@rule
|
||||
def _object_pairs(self):
|
||||
return alt(
|
||||
self.object_pair + newline(),
|
||||
self.object_pair + self.COMMA + newline() + self._object_pairs,
|
||||
newline() + self.object_pair,
|
||||
newline() + self.object_pair + self.COMMA + self._object_pairs,
|
||||
)
|
||||
|
||||
@rule
|
||||
|
|
@ -44,13 +44,13 @@ class JsonGrammar(Grammar):
|
|||
|
||||
@rule
|
||||
def array(self):
|
||||
return group(self.LSQUARE + opt(self._array_items) + self.RSQUARE)
|
||||
return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
|
||||
|
||||
@rule
|
||||
def _array_items(self):
|
||||
return alt(
|
||||
self.value + newline(),
|
||||
self.value + self.COMMA + newline() + self._array_items,
|
||||
newline() + self.value,
|
||||
newline() + self.value + self.COMMA + self._array_items,
|
||||
)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
|
@ -112,7 +112,7 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
|
|||
typing.assert_never(doc)
|
||||
|
||||
|
||||
def test_basic_printer():
|
||||
def test_convert_tree_to_document():
|
||||
text = '{"a": true, "b":[1,2,3]}'
|
||||
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
||||
tree, errors = JSON_PARSER.parse(tokens)
|
||||
|
|
@ -148,3 +148,28 @@ def test_basic_printer():
|
|||
"}",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def test_layout_basic():
|
||||
text = '{"a": true, "b":[1,2,3]}'
|
||||
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
||||
tree, errors = JSON_PARSER.parse(tokens)
|
||||
assert [] == errors
|
||||
assert tree is not None
|
||||
|
||||
printer = wadler.Printer(JSON)
|
||||
result = printer.format_tree(tree, 10).apply_to_source(text)
|
||||
|
||||
assert (
|
||||
result
|
||||
== """
|
||||
{
|
||||
"a":true,
|
||||
"b":[
|
||||
1,
|
||||
2,
|
||||
3
|
||||
]
|
||||
}
|
||||
""".strip()
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue