Support newline replacements

This allows us to do maybe more complicated spacing.

Still unclear about identifier/punctuation spacing.
This commit is contained in:
John Doty 2024-09-12 11:09:14 -07:00
parent b3b2102864
commit 938f0e5c69
2 changed files with 101 additions and 56 deletions

View file

@ -21,7 +21,7 @@ def cons(left: "Document", right: "Document") -> "Document":
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class NewLine: class NewLine:
pass replace: str
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
@ -119,13 +119,9 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
case Literal(text): case Literal(text):
remaining -= len(text) remaining -= len(text)
case NewLine(): case NewLine(replace):
if chunk.flat: if chunk.flat:
# These are newlines that have been rendered flat, remaining -= len(replace)
# they are spaces I guess? TODO: Consider alternate
# forms, something that "goes here instead of
# newline", like maybe the empty string or... what?
remaining -= 1
else: else:
# These are newlines that are real, so it must have # These are newlines that are real, so it must have
# all fit. # all fit.
@ -175,12 +171,10 @@ def layout_document(doc: Document, width: int) -> DocumentLayout:
output.append(text) output.append(text)
column += len(text) column += len(text)
case NewLine(): case NewLine(replace):
if chunk.flat: if chunk.flat:
# TODO: Custom newline flat mode. See also the output.append(replace)
# corresponding comment in the "fits" function. column += len(replace)
output.append(" ")
column += 1
else: else:
# TODO: Custom newline expansion, custom indent segments. # TODO: Custom newline expansion, custom indent segments.
output.append("\n" + (chunk.indent * " ")) output.append("\n" + (chunk.indent * " "))
@ -220,7 +214,7 @@ def resolve_document(doc: Document) -> Document:
return doc return doc
case Lazy(_): case Lazy(_):
return doc.resolve() return resolve_document(doc.resolve())
case _: case _:
return doc return doc
@ -239,16 +233,19 @@ class Matcher:
table: parser.ParseTable table: parser.ParseTable
indent_amounts: dict[str, int] indent_amounts: dict[str, int]
text_follow: dict[str, str] text_follow: dict[str, str]
newline_replace: dict[str, str]
def __init__( def __init__(
self, self,
table: parser.ParseTable, table: parser.ParseTable,
indent_amounts: dict[str, int], indent_amounts: dict[str, int],
text_follow: dict[str, str], text_follow: dict[str, str],
newline_replace: dict[str, str],
): ):
self.table = table self.table = table
self.indent_amounts = indent_amounts self.indent_amounts = indent_amounts
self.text_follow = text_follow self.text_follow = text_follow
self.newline_replace = newline_replace
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document: def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
stack: list[tuple[int, Document]] = [(0, None)] stack: list[tuple[int, Document]] = [(0, None)]
@ -294,15 +291,17 @@ class Matcher:
child = Indent(amount, child) child = Indent(amount, child)
elif name[0] == "n": elif name[0] == "n":
child = cons(child, NewLine()) replace = self.newline_replace[name]
print(f"!!!! {name} -> {repr(replace)}")
child = cons(child, NewLine(replace))
elif name[0] == "p": elif name[0] == "p":
child = cons(NewLine(), child) child = cons(NewLine(""), child)
else: else:
pass # Reducing a transparent rule probably. pass # Reducing a transparent rule probably.
goto = self.table.gotos[stack[-1][0]].get(name) goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None assert goto is not None
stack.append((goto, child)) stack.append((goto, child))
@ -315,7 +314,27 @@ class Matcher:
if value.name: if value.name:
follow = self.text_follow.get(value.name) follow = self.text_follow.get(value.name)
else: else:
# Here is where we consider ephemera. We can say: if
# the trailing ephemera includes a blank, then we
# insert a blank here. We do not want to double-count
# blanks, maybe we can have some kind of a notion of
# what is a blank.
#
# A wierd digression: one thing that's weird is that
# blank spaces are always kinda culturally assumed?
# But the computer always has to be taught. In hand-
# printers, the spaces are added by a person and the
# person doesn't think twice. We are in the unique
# position of "generalizing" the blank space for
# formatting purposes.
child = Text(value.start, value.end) child = Text(value.start, value.end)
for trivia in value.pre_trivia:
pass
for trivia in value.post_trivia:
pass
follow = self.text_follow.get(value.kind) follow = self.text_follow.get(value.kind)
if follow is not None: if follow is not None:
@ -357,6 +376,7 @@ class Printer:
group_count = 0 group_count = 0
indent_amounts: dict[str, int] = {} indent_amounts: dict[str, int] = {}
done_newline = False done_newline = False
newline_map: dict[str, str] = {}
def compile_nonterminal(name: str, rule: parser.NonTerminal): def compile_nonterminal(name: str, rule: parser.NonTerminal):
if name not in visited: if name not in visited:
@ -411,10 +431,13 @@ class Printer:
tx_children = [rule_name] tx_children = [rule_name]
if pretty.newline is not None: if pretty.newline is not None:
if not done_newline: newline_rule_name = newline_map.get(pretty.newline)
generated_grammar.append(("newline", [])) if newline_rule_name is None:
done_newline = True newline_rule_name = f"n{len(newline_map)}"
tx_children.append("newline") newline_map[pretty.newline] = newline_rule_name
generated_grammar.append((newline_rule_name, []))
tx_children.append(newline_rule_name)
# If it turned out to have formatting meta then we will # If it turned out to have formatting meta then we will
# have replaced or augmented the translated children # have replaced or augmented the translated children
@ -429,7 +452,13 @@ class Printer:
gen = self.grammar._generator(rule.name, generated_grammar) gen = self.grammar._generator(rule.name, generated_grammar)
parse_table = gen.gen_table() parse_table = gen.gen_table()
return Matcher(parse_table, indent_amounts, self._text_follow) newline_replace = {v: k for k, v in newline_map.items()}
return Matcher(
parse_table,
indent_amounts,
self._text_follow,
newline_replace,
)
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
result = self._matchers.get(rule.name) result = self._matchers.get(rule.name)

View file

@ -29,35 +29,42 @@ class JsonGrammar(Grammar):
@rule @rule
def object(self): def object(self):
return group(self.LCURLY + opt(indent(self._object_pairs)) + newline() + self.RCURLY) return group(
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
)
@rule @rule
def _object_pairs(self): def _object_pairs(self):
return alt( return alt(
newline() + self.object_pair, self.object_pair,
newline() + self.object_pair + self.COMMA + self._object_pairs, self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
) )
@rule @rule
def object_pair(self): def object_pair(self):
return group(self.STRING + self.COLON + self.value) return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
@rule @rule
def array(self): def array(self):
return group(self.LSQUARE + opt(indent(self._array_items)) + newline() + self.RSQUARE) return group(
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
)
@rule @rule
def _array_items(self): def _array_items(self):
return alt( return alt(
newline() + self.value, self.value,
newline() + self.value + self.COMMA + self._array_items, self.value + self.COMMA + newline(" ") + self._array_items,
) )
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) BLANKS = Terminal(
Re.set(" ", "\t", "\r", "\n").plus(),
is_format_blank=True,
)
LCURLY = Terminal("{") LCURLY = Terminal("{")
RCURLY = Terminal("}") RCURLY = Terminal("}")
COMMA = Terminal(",") COMMA = Terminal(",")
COLON = Terminal(":", format_follow=" ") COLON = Terminal(":")
LSQUARE = Terminal("[") LSQUARE = Terminal("[")
RSQUARE = Terminal("]") RSQUARE = Terminal("]")
TRUE = Terminal("true") TRUE = Terminal("true")
@ -94,8 +101,8 @@ JSON_PARSER = runtime.Parser(JSON_TABLE)
def flatten_document(doc: wadler.Document, src: str) -> list: def flatten_document(doc: wadler.Document, src: str) -> list:
match doc: match doc:
case wadler.NewLine(): case wadler.NewLine(replace):
return ["<newline>"] return [f"<newline {repr(replace)}>"]
case wadler.Indent(): case wadler.Indent():
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]] return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
case wadler.Text(start, end): case wadler.Text(start, end):
@ -130,61 +137,70 @@ def test_convert_tree_to_document():
[ [
"<indent 1>", "<indent 1>",
[ [
"<newline>", "<newline ''>",
['"a"', ":", " ", "true"], [
'"a"',
":",
[
"<indent 1>",
["<newline ' '>", "true"],
],
],
",", ",",
"<newline>", "<newline ' '>",
[ [
'"b"', '"b"',
":", ":",
" ",
[ [
"[", "<indent 1>",
[ [
"<indent 1>", "<newline ' '>",
[ [
"<newline>", "[",
"1", [
",", "<indent 1>",
"<newline>", [
"2", "<newline ''>",
",", "1",
"<newline>", ",",
"3", "<newline ' '>",
"2",
",",
"<newline ' '>",
"3",
],
],
"<newline ''>",
"]",
], ],
], ],
"<newline>",
"]",
], ],
], ],
], ],
], ],
"<newline>", "<newline ''>",
"}", "}",
] ]
] ]
def test_layout_basic(): def test_layout_basic():
text = '{"a": true, "b":[1,2,3]}' text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
tokens = runtime.GenericTokenStream(text, JSON_LEXER) tokens = runtime.GenericTokenStream(text, JSON_LEXER)
tree, errors = JSON_PARSER.parse(tokens) tree, errors = JSON_PARSER.parse(tokens)
assert [] == errors assert [] == errors
assert tree is not None assert tree is not None
printer = wadler.Printer(JSON) printer = wadler.Printer(JSON)
result = printer.format_tree(tree, 10).apply_to_source(text) result = printer.format_tree(tree, 50).apply_to_source(text)
assert ( assert (
result result
== """ == """
{ {
"a": true, "a": true,
"b": [ "b": [1, 2, 3],
1, "c": [1, 2, 3, 4, 5, 6, 7]
2,
3
]
} }
""".strip() """.strip()
) )