Support newline replacements

This allows us to do maybe more complicated spacing.

Still unclear about identifier/punctuation spacing.
This commit is contained in:
John Doty 2024-09-12 11:09:14 -07:00
parent b3b2102864
commit 938f0e5c69
2 changed files with 101 additions and 56 deletions

View file

@ -21,7 +21,7 @@ def cons(left: "Document", right: "Document") -> "Document":
@dataclasses.dataclass(frozen=True)
class NewLine:
pass
replace: str
@dataclasses.dataclass(frozen=True)
@ -119,13 +119,9 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
case Literal(text):
remaining -= len(text)
case NewLine():
case NewLine(replace):
if chunk.flat:
# These are newlines that have been rendered flat,
# they are spaces I guess? TODO: Consider alternate
# forms, something that "goes here instead of
# newline", like maybe the empty string or... what?
remaining -= 1
remaining -= len(replace)
else:
# These are newlines that are real, so it must have
# all fit.
@ -175,12 +171,10 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
output.append(text)
column += len(text)
case NewLine():
case NewLine(replace):
if chunk.flat:
# TODO: Custom newline flat mode. See also the
# corresponding comment in the "fits" function.
output.append(" ")
column += 1
output.append(replace)
column += len(replace)
else:
# TODO: Custom newline expansion, custom indent segments.
output.append("\n" + (chunk.indent * " "))
@ -220,7 +214,7 @@ def resolve_document(doc: Document) -> Document:
return doc
case Lazy(_):
return doc.resolve()
return resolve_document(doc.resolve())
case _:
return doc
@ -239,16 +233,19 @@ class Matcher:
table: parser.ParseTable
indent_amounts: dict[str, int]
text_follow: dict[str, str]
newline_replace: dict[str, str]
def __init__(
self,
table: parser.ParseTable,
indent_amounts: dict[str, int],
text_follow: dict[str, str],
newline_replace: dict[str, str],
):
self.table = table
self.indent_amounts = indent_amounts
self.text_follow = text_follow
self.newline_replace = newline_replace
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
stack: list[tuple[int, Document]] = [(0, None)]
@ -294,15 +291,17 @@ class Matcher:
child = Indent(amount, child)
elif name[0] == "n":
child = cons(child, NewLine())
replace = self.newline_replace[name]
print(f"!!!! {name} -> {repr(replace)}")
child = cons(child, NewLine(replace))
elif name[0] == "p":
child = cons(NewLine(), child)
child = cons(NewLine(""), child)
else:
pass # Reducing a transparent rule probably.
goto = self.table.gotos[stack[-1][0]].get(name)
goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None
stack.append((goto, child))
@ -315,7 +314,27 @@ class Matcher:
if value.name:
follow = self.text_follow.get(value.name)
else:
# Here is where we consider ephemera. We can say: if
# the trailing ephemera includes a blank, then we
# insert a blank here. We do not want to double-count
# blanks, maybe we can have some kind of a notion of
# what is a blank.
#
# A wierd digression: one thing that's weird is that
# blank spaces are always kinda culturally assumed?
# But the computer always has to be taught. In hand-
# printers, the spaces are added by a person and the
# person doesn't think twice. We are in the unique
# position of "generalizing" the blank space for
# formatting purposes.
child = Text(value.start, value.end)
for trivia in value.pre_trivia:
pass
for trivia in value.post_trivia:
pass
follow = self.text_follow.get(value.kind)
if follow is not None:
@ -357,6 +376,7 @@ class Printer:
group_count = 0
indent_amounts: dict[str, int] = {}
done_newline = False
newline_map: dict[str, str] = {}
def compile_nonterminal(name: str, rule: parser.NonTerminal):
if name not in visited:
@ -411,10 +431,13 @@ class Printer:
tx_children = [rule_name]
if pretty.newline is not None:
if not done_newline:
generated_grammar.append(("newline", []))
done_newline = True
tx_children.append("newline")
newline_rule_name = newline_map.get(pretty.newline)
if newline_rule_name is None:
newline_rule_name = f"n{len(newline_map)}"
newline_map[pretty.newline] = newline_rule_name
generated_grammar.append((newline_rule_name, []))
tx_children.append(newline_rule_name)
# If it turned out to have formatting meta then we will
# have replaced or augmented the translated children
@ -429,7 +452,13 @@ class Printer:
gen = self.grammar._generator(rule.name, generated_grammar)
parse_table = gen.gen_table()
return Matcher(parse_table, indent_amounts, self._text_follow)
newline_replace = {v: k for k, v in newline_map.items()}
return Matcher(
parse_table,
indent_amounts,
self._text_follow,
newline_replace,
)
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
result = self._matchers.get(rule.name)

View file

@ -29,35 +29,42 @@ class JsonGrammar(Grammar):
@rule
def object(self):
return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY)
return group(
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
)
@rule
def _object_pairs(self):
return alt(
newline() + self.object_pair,
newline() + self.object_pair + self.COMMA + self._object_pairs,
self.object_pair,
self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
)
@rule
def object_pair(self):
return group(self.STRING + self.COLON + self.value)
return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
@rule
def array(self):
return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE)
return group(
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
)
@rule
def _array_items(self):
return alt(
newline() + self.value,
newline() + self.value + self.COMMA + self._array_items,
self.value,
self.value + self.COMMA + newline(" ") + self._array_items,
)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
BLANKS = Terminal(
Re.set(" ", "\t", "\r", "\n").plus(),
is_format_blank=True,
)
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COMMA = Terminal(",")
COLON = Terminal(":", format_follow=" ")
COLON = Terminal(":")
LSQUARE = Terminal("[")
RSQUARE = Terminal("]")
TRUE = Terminal("true")
@ -94,8 +101,8 @@ JSON_PARSER = runtime.Parser(JSON_TABLE)
def flatten_document(doc: wadler.Document, src: str) -> list:
match doc:
case wadler.NewLine():
return ["<newline>"]
case wadler.NewLine(replace):
return [f"<newline {repr(replace)}>"]
case wadler.Indent():
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
case wadler.Text(start, end):
@ -130,61 +137,70 @@ def test_convert_tree_to_document():
[
"<indent 1>",
[
"<newline>",
['"a"', ":", " ", "true"],
"<newline ''>",
[
'"a"',
":",
[
"<indent 1>",
["<newline ' '>", "true"],
],
],
",",
"<newline>",
"<newline ' '>",
[
'"b"',
":",
" ",
[
"[",
"<indent 1>",
[
"<indent 1>",
"<newline ' '>",
[
"<newline>",
"1",
",",
"<newline>",
"2",
",",
"<newline>",
"3",
"[",
[
"<indent 1>",
[
"<newline ''>",
"1",
",",
"<newline ' '>",
"2",
",",
"<newline ' '>",
"3",
],
],
"<newline ''>",
"]",
],
],
"<newline>",
"]",
],
],
],
],
"<newline>",
"<newline ''>",
"}",
]
]
def test_layout_basic():
text = '{"a": true, "b":[1,2,3]}'
text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
result = printer.format_tree(tree, 10).apply_to_source(text)
result = printer.format_tree(tree, 50).apply_to_source(text)
assert (
result
== """
{
"a": true,
"b": [
1,
2,
3
]
"b": [1, 2, 3],
"c": [1, 2, 3, 4, 5, 6, 7]
}
""".strip()
)