diff --git a/parser/parser.py b/parser/parser.py index 23e1935..c205154 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -2673,29 +2673,48 @@ highlight = _Highlight() ############################################################################### -# Pretty-printing metadata support +# Formatting (pretty-printing) metadata support ############################################################################### @dataclasses.dataclass class FormatMeta(SyntaxMeta): - newline: bool = False + newline: str | None = None indent: int | None = None group: bool = False def group(*rules: Rule) -> Rule: + """Indicates that the text should be put on a single line if possible + during pretty-printing. Has no effect on parsing. + """ return mark(seq(*rules), format=FormatMeta(group=True)) def indent(*rules: Rule, amount: int | None = None) -> Rule: + """Indicates a new level indentation during pretty-printing. The provided + rules are otherwise treated as if they were in a sequence. This rule has + no effect on parsing otherwise. + + The specified amount is the number of "indentation" values to indent the + lines with. It defaults to 1. + """ if amount is None: - amount = 4 + amount = 1 return mark(seq(*rules), format=FormatMeta(indent=amount)) -def newline() -> Rule: - return mark(Nothing, format=FormatMeta(newline=True)) +def newline(text: str | None = None) -> Rule: + """Indicate that, during pretty-printing, the line can be broken here. Has + no effect parsing. + + If text is provided, the text will be inserted before the line break. This + allows for e.g. trailing commas in lists and whatnot to make things look + prettier, when supported. + """ + if text is None: + text = "" + return mark(Nothing, format=FormatMeta(newline=text)) ############################################################################### diff --git a/parser/wadler.py b/parser/wadler.py index 98c249f..78e4e93 100644 --- a/parser/wadler.py +++ b/parser/wadler.py @@ -58,6 +58,145 @@ class Lazy: Document = None | Text | NewLine | Cons | Indent | Group | Lazy +class DocumentLayout: + segments: list[str | tuple[int, int]] + + def __init__(self, segments): + self.segments = segments + + def apply_to_source(self, original: str) -> str: + result = "" + for segment in self.segments: + if isinstance(segment, str): + result += segment + else: + start, end = segment + result += original[start:end] + + return result + + +def layout_document(doc: Document, width: int) -> DocumentLayout: + """Lay out a document to fit within the given width. + + The result of this function is a layout which can trivially be converted + into a string given the original document. + """ + + @dataclasses.dataclass + class Chunk: + doc: Document + indent: int + flat: bool + + def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk": + return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat) + + column = 0 + chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)] + + def fits(chunk: Chunk) -> bool: + remaining = width - column + if remaining <= 0: + return False + + stack = list(chunks) + stack.append(chunk) + while len(stack) > 0: + chunk = stack.pop() + match chunk.doc: + case None: + pass + + case Text(start, end): + remaining -= end - start + + case NewLine(): + if chunk.flat: + # These are newlines that have been rendered flat, + # they are spaces I guess? TODO: Consider alternate + # forms, something that "goes here instead of + # newline", like maybe the empty string or... what? + remaining -= 1 + else: + # These are newlines that are real, so it must have + # all fit. + return True + + case Cons(left, right): + stack.append(chunk.with_document(right)) + stack.append(chunk.with_document(left)) + + case Lazy(): + stack.append(chunk.with_document(chunk.doc.resolve())) + + case Indent(amount, child): + stack.append(chunk.with_document(child, and_indent=amount)) + + case Group(child): + # The difference between this approach and Justin's twist + # is that we consider the flat variable in Newline(), + # above, rather than here in Group. This makes us more + # like Wadler's original formulation, I guess. The + # grouping is an implicit transform over alternatives + # represented by newline. (If we have other kinds of + # alternatives we'll have to work those out elsewhere as + # well.) + stack.append(chunk.with_document(child)) + + case _: + typing.assert_never(chunk.doc) + + if remaining < 0: + return False + + return True # Everything must fit, so great! + + output: list[str | tuple[int, int]] = [] + while len(chunks) > 0: + chunk = chunks.pop() + match chunk.doc: + case None: + pass + + case Text(start, end): + output.append((start, end)) + column += end - start + + case NewLine(): + if chunk.flat: + # TODO: Custom newline flat mode. See also the + # corresponding comment in the "fits" function. + output.append(" ") + column += 1 + else: + # TODO: Custom newline expansion, custom indent segments. + output.append("\n" + (chunk.indent * " ")) + column = chunk.indent + + case Cons(left, right): + chunks.append(chunk.with_document(right)) + chunks.append(chunk.with_document(left)) + + case Indent(amount, doc): + chunks.append(chunk.with_document(doc, and_indent=amount)) + + case Lazy(): + chunks.append(chunk.with_document(chunk.doc.resolve())) + + case Group(child): + candidate = Chunk(doc=child, indent=chunk.indent, flat=True) + if chunk.flat or fits(candidate): + chunks.append(candidate) + else: + chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False)) + + case _: + typing.assert_never(chunk) + + return DocumentLayout(output) + + def resolve_document(doc: Document) -> Document: match doc: case Cons(left, right): @@ -75,12 +214,9 @@ def resolve_document(doc: Document) -> Document: return doc -def layout_document(doc: Document) -> typing.Generator[str, None, None]: - del doc - raise NotImplementedError() - - def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str: + # TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION + # The naming condition is important but if isinstance(child, runtime.Tree): return f"tree_{child.name}" else: @@ -230,7 +366,7 @@ class Printer: generated_grammar.append((rule_name, tx_children)) tx_children = [rule_name] - if pretty.newline: + if pretty.newline is not None: if not done_newline: generated_grammar.append(("newline", [])) done_newline = True @@ -272,6 +408,6 @@ class Printer: ) return resolve_document(m) - def format_tree(self, tree: runtime.Tree) -> str: + def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout: doc = self.convert_tree_to_document(tree) - return next(layout_document(doc)) + return layout_document(doc, width) diff --git a/tests/test_wadler.py b/tests/test_wadler.py index 6fc69ae..f392a87 100644 --- a/tests/test_wadler.py +++ b/tests/test_wadler.py @@ -1,6 +1,6 @@ import typing -from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt +from parser.parser import Grammar, Re, Terminal, rule, opt, group, newline, alt, indent import parser.runtime as runtime import parser.wadler as wadler @@ -29,13 +29,13 @@ class JsonGrammar(Grammar): @rule def object(self): - return group(self.LCURLY + opt(self._object_pairs) + self.RCURLY) + return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY) @rule def _object_pairs(self): return alt( - self.object_pair + newline(), - self.object_pair + self.COMMA + newline() + self._object_pairs, + newline() + self.object_pair, + newline() + self.object_pair + self.COMMA + self._object_pairs, ) @rule @@ -44,13 +44,13 @@ class JsonGrammar(Grammar): @rule def array(self): - return group(self.LSQUARE + opt(self._array_items) + self.RSQUARE) + return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE) @rule def _array_items(self): return alt( - self.value + newline(), - self.value + self.COMMA + newline() + self._array_items, + newline() + self.value, + newline() + self.value + self.COMMA + self._array_items, ) BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) @@ -112,7 +112,7 @@ def flatten_document(doc: wadler.Document, src: str) -> list: typing.assert_never(doc) -def test_basic_printer(): +def test_convert_tree_to_document(): text = '{"a": true, "b":[1,2,3]}' tokens = runtime.GenericTokenStream(text, JSON_LEXER) tree, errors = JSON_PARSER.parse(tokens) @@ -148,3 +148,28 @@ def test_basic_printer(): "}", ] ] + + +def test_layout_basic(): + text = '{"a": true, "b":[1,2,3]}' + tokens = runtime.GenericTokenStream(text, JSON_LEXER) + tree, errors = JSON_PARSER.parse(tokens) + assert [] == errors + assert tree is not None + + printer = wadler.Printer(JSON) + result = printer.format_tree(tree, 10).apply_to_source(text) + + assert ( + result + == """ +{ + "a":true, + "b":[ + 1, + 2, + 3 + ] +} +""".strip() + )