Actual pretty-printing!
Now we're cooking with gas ALTHOUGH now we have to deal with the fact that we're gluing everything together where there *should* be spaces. Many more improvements to come.
This commit is contained in:
parent
5d88b459b9
commit
d6dd54f4df
3 changed files with 201 additions and 21 deletions
|
|
@ -2673,29 +2673,48 @@ highlight = _Highlight()
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Pretty-printing metadata support
|
# Formatting (pretty-printing) metadata support
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class FormatMeta(SyntaxMeta):
|
class FormatMeta(SyntaxMeta):
|
||||||
newline: bool = False
|
newline: str | None = None
|
||||||
indent: int | None = None
|
indent: int | None = None
|
||||||
group: bool = False
|
group: bool = False
|
||||||
|
|
||||||
|
|
||||||
def group(*rules: Rule) -> Rule:
|
def group(*rules: Rule) -> Rule:
|
||||||
|
"""Indicates that the text should be put on a single line if possible
|
||||||
|
during pretty-printing. Has no effect on parsing.
|
||||||
|
"""
|
||||||
return mark(seq(*rules), format=FormatMeta(group=True))
|
return mark(seq(*rules), format=FormatMeta(group=True))
|
||||||
|
|
||||||
|
|
||||||
def indent(*rules: Rule, amount: int | None = None) -> Rule:
|
def indent(*rules: Rule, amount: int | None = None) -> Rule:
|
||||||
|
"""Indicates a new level indentation during pretty-printing. The provided
|
||||||
|
rules are otherwise treated as if they were in a sequence. This rule has
|
||||||
|
no effect on parsing otherwise.
|
||||||
|
|
||||||
|
The specified amount is the number of "indentation" values to indent the
|
||||||
|
lines with. It defaults to 1.
|
||||||
|
"""
|
||||||
if amount is None:
|
if amount is None:
|
||||||
amount = 4
|
amount = 1
|
||||||
return mark(seq(*rules), format=FormatMeta(indent=amount))
|
return mark(seq(*rules), format=FormatMeta(indent=amount))
|
||||||
|
|
||||||
|
|
||||||
def newline() -> Rule:
|
def newline(text: str | None = None) -> Rule:
|
||||||
return mark(Nothing, format=FormatMeta(newline=True))
|
"""Indicate that, during pretty-printing, the line can be broken here. Has
|
||||||
|
no effect parsing.
|
||||||
|
|
||||||
|
If text is provided, the text will be inserted before the line break. This
|
||||||
|
allows for e.g. trailing commas in lists and whatnot to make things look
|
||||||
|
prettier, when supported.
|
||||||
|
"""
|
||||||
|
if text is None:
|
||||||
|
text = ""
|
||||||
|
return mark(Nothing, format=FormatMeta(newline=text))
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
|
||||||
152
parser/wadler.py
152
parser/wadler.py
|
|
@ -58,6 +58,145 @@ class Lazy:
|
||||||
Document = None | Text | NewLine | Cons | Indent | Group | Lazy
|
Document = None | Text | NewLine | Cons | Indent | Group | Lazy
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentLayout:
|
||||||
|
segments: list[str | tuple[int, int]]
|
||||||
|
|
||||||
|
def __init__(self, segments):
|
||||||
|
self.segments = segments
|
||||||
|
|
||||||
|
def apply_to_source(self, original: str) -> str:
|
||||||
|
result = ""
|
||||||
|
for segment in self.segments:
|
||||||
|
if isinstance(segment, str):
|
||||||
|
result += segment
|
||||||
|
else:
|
||||||
|
start, end = segment
|
||||||
|
result += original[start:end]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def layout_document(doc: Document, width: int) -> DocumentLayout:
|
||||||
|
"""Lay out a document to fit within the given width.
|
||||||
|
|
||||||
|
The result of this function is a layout which can trivially be converted
|
||||||
|
into a string given the original document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class Chunk:
|
||||||
|
doc: Document
|
||||||
|
indent: int
|
||||||
|
flat: bool
|
||||||
|
|
||||||
|
def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
|
||||||
|
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
|
||||||
|
|
||||||
|
column = 0
|
||||||
|
chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
|
||||||
|
|
||||||
|
def fits(chunk: Chunk) -> bool:
|
||||||
|
remaining = width - column
|
||||||
|
if remaining <= 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
stack = list(chunks)
|
||||||
|
stack.append(chunk)
|
||||||
|
while len(stack) > 0:
|
||||||
|
chunk = stack.pop()
|
||||||
|
match chunk.doc:
|
||||||
|
case None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
case Text(start, end):
|
||||||
|
remaining -= end - start
|
||||||
|
|
||||||
|
case NewLine():
|
||||||
|
if chunk.flat:
|
||||||
|
# These are newlines that have been rendered flat,
|
||||||
|
# they are spaces I guess? TODO: Consider alternate
|
||||||
|
# forms, something that "goes here instead of
|
||||||
|
# newline", like maybe the empty string or... what?
|
||||||
|
remaining -= 1
|
||||||
|
else:
|
||||||
|
# These are newlines that are real, so it must have
|
||||||
|
# all fit.
|
||||||
|
return True
|
||||||
|
|
||||||
|
case Cons(left, right):
|
||||||
|
stack.append(chunk.with_document(right))
|
||||||
|
stack.append(chunk.with_document(left))
|
||||||
|
|
||||||
|
case Lazy():
|
||||||
|
stack.append(chunk.with_document(chunk.doc.resolve()))
|
||||||
|
|
||||||
|
case Indent(amount, child):
|
||||||
|
stack.append(chunk.with_document(child, and_indent=amount))
|
||||||
|
|
||||||
|
case Group(child):
|
||||||
|
# The difference between this approach and Justin's twist
|
||||||
|
# is that we consider the flat variable in Newline(),
|
||||||
|
# above, rather than here in Group. This makes us more
|
||||||
|
# like Wadler's original formulation, I guess. The
|
||||||
|
# grouping is an implicit transform over alternatives
|
||||||
|
# represented by newline. (If we have other kinds of
|
||||||
|
# alternatives we'll have to work those out elsewhere as
|
||||||
|
# well.)
|
||||||
|
stack.append(chunk.with_document(child))
|
||||||
|
|
||||||
|
case _:
|
||||||
|
typing.assert_never(chunk.doc)
|
||||||
|
|
||||||
|
if remaining < 0:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True # Everything must fit, so great!
|
||||||
|
|
||||||
|
output: list[str | tuple[int, int]] = []
|
||||||
|
while len(chunks) > 0:
|
||||||
|
chunk = chunks.pop()
|
||||||
|
match chunk.doc:
|
||||||
|
case None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
case Text(start, end):
|
||||||
|
output.append((start, end))
|
||||||
|
column += end - start
|
||||||
|
|
||||||
|
case NewLine():
|
||||||
|
if chunk.flat:
|
||||||
|
# TODO: Custom newline flat mode. See also the
|
||||||
|
# corresponding comment in the "fits" function.
|
||||||
|
output.append(" ")
|
||||||
|
column += 1
|
||||||
|
else:
|
||||||
|
# TODO: Custom newline expansion, custom indent segments.
|
||||||
|
output.append("\n" + (chunk.indent * " "))
|
||||||
|
column = chunk.indent
|
||||||
|
|
||||||
|
case Cons(left, right):
|
||||||
|
chunks.append(chunk.with_document(right))
|
||||||
|
chunks.append(chunk.with_document(left))
|
||||||
|
|
||||||
|
case Indent(amount, doc):
|
||||||
|
chunks.append(chunk.with_document(doc, and_indent=amount))
|
||||||
|
|
||||||
|
case Lazy():
|
||||||
|
chunks.append(chunk.with_document(chunk.doc.resolve()))
|
||||||
|
|
||||||
|
case Group(child):
|
||||||
|
candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
|
||||||
|
if chunk.flat or fits(candidate):
|
||||||
|
chunks.append(candidate)
|
||||||
|
else:
|
||||||
|
chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
|
||||||
|
|
||||||
|
case _:
|
||||||
|
typing.assert_never(chunk)
|
||||||
|
|
||||||
|
return DocumentLayout(output)
|
||||||
|
|
||||||
|
|
||||||
def resolve_document(doc: Document) -> Document:
|
def resolve_document(doc: Document) -> Document:
|
||||||
match doc:
|
match doc:
|
||||||
case Cons(left, right):
|
case Cons(left, right):
|
||||||
|
|
@ -75,12 +214,9 @@ def resolve_document(doc: Document) -> Document:
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
def layout_document(doc: Document) -> typing.Generator[str, None, None]:
|
|
||||||
del doc
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
|
|
||||||
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
|
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
|
||||||
|
# TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION
|
||||||
|
# The naming condition is important but
|
||||||
if isinstance(child, runtime.Tree):
|
if isinstance(child, runtime.Tree):
|
||||||
return f"tree_{child.name}"
|
return f"tree_{child.name}"
|
||||||
else:
|
else:
|
||||||
|
|
@ -230,7 +366,7 @@ class Printer:
|
||||||
generated_grammar.append((rule_name, tx_children))
|
generated_grammar.append((rule_name, tx_children))
|
||||||
tx_children = [rule_name]
|
tx_children = [rule_name]
|
||||||
|
|
||||||
if pretty.newline:
|
if pretty.newline is not None:
|
||||||
if not done_newline:
|
if not done_newline:
|
||||||
generated_grammar.append(("newline", []))
|
generated_grammar.append(("newline", []))
|
||||||
done_newline = True
|
done_newline = True
|
||||||
|
|
@ -272,6 +408,6 @@ class Printer:
|
||||||
)
|
)
|
||||||
return resolve_document(m)
|
return resolve_document(m)
|
||||||
|
|
||||||
def format_tree(self, tree: runtime.Tree) -> str:
|
def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
|
||||||
doc = self.convert_tree_to_document(tree)
|
doc = self.convert_tree_to_document(tree)
|
||||||
return next(layout_document(doc))
|
return layout_document(doc, width)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt
|
from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent
|
||||||
|
|
||||||
import parser.runtime as runtime
|
import parser.runtime as runtime
|
||||||
import parser.wadler as wadler
|
import parser.wadler as wadler
|
||||||
|
|
@ -29,13 +29,13 @@ class JsonGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def object(self):
|
def object(self):
|
||||||
return group(self.LCURLY + opt(self._object_pairs) + self.RCURLY)
|
return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _object_pairs(self):
|
def _object_pairs(self):
|
||||||
return alt(
|
return alt(
|
||||||
self.object_pair + newline(),
|
newline() + self.object_pair,
|
||||||
self.object_pair + self.COMMA + newline() + self._object_pairs,
|
newline() + self.object_pair + self.COMMA + self._object_pairs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
|
|
@ -44,13 +44,13 @@ class JsonGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def array(self):
|
def array(self):
|
||||||
return group(self.LSQUARE + opt(self._array_items) + self.RSQUARE)
|
return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _array_items(self):
|
def _array_items(self):
|
||||||
return alt(
|
return alt(
|
||||||
self.value + newline(),
|
newline() + self.value,
|
||||||
self.value + self.COMMA + newline() + self._array_items,
|
newline() + self.value + self.COMMA + self._array_items,
|
||||||
)
|
)
|
||||||
|
|
||||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||||
|
|
@ -112,7 +112,7 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
|
||||||
typing.assert_never(doc)
|
typing.assert_never(doc)
|
||||||
|
|
||||||
|
|
||||||
def test_basic_printer():
|
def test_convert_tree_to_document():
|
||||||
text = '{"a": true, "b":[1,2,3]}'
|
text = '{"a": true, "b":[1,2,3]}'
|
||||||
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
||||||
tree, errors = JSON_PARSER.parse(tokens)
|
tree, errors = JSON_PARSER.parse(tokens)
|
||||||
|
|
@ -148,3 +148,28 @@ def test_basic_printer():
|
||||||
"}",
|
"}",
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_layout_basic():
|
||||||
|
text = '{"a": true, "b":[1,2,3]}'
|
||||||
|
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
||||||
|
tree, errors = JSON_PARSER.parse(tokens)
|
||||||
|
assert [] == errors
|
||||||
|
assert tree is not None
|
||||||
|
|
||||||
|
printer = wadler.Printer(JSON)
|
||||||
|
result = printer.format_tree(tree, 10).apply_to_source(text)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
result
|
||||||
|
== """
|
||||||
|
{
|
||||||
|
"a":true,
|
||||||
|
"b":[
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
3
|
||||||
|
]
|
||||||
|
}
|
||||||
|
""".strip()
|
||||||
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue