471 lines
16 KiB
Python
471 lines
16 KiB
Python
# A prettier printer.
|
|
import dataclasses
|
|
import typing
|
|
|
|
from . import parser
|
|
from . import runtime
|
|
|
|
# TODO: I think I want a *force break*, i.e., a document which forces things
|
|
# to not fit on one line.
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Cons:
|
|
left: "Document"
|
|
right: "Document"
|
|
|
|
|
|
def cons(left: "Document", right: "Document") -> "Document":
|
|
if left and right:
|
|
return Cons(left, right)
|
|
else:
|
|
return left or right
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class NewLine:
|
|
replace: str
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class ForceBreak:
|
|
pass
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Indent:
|
|
amount: int
|
|
doc: "Document"
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Text:
|
|
start: int
|
|
end: int
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Literal:
|
|
text: str
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Group:
|
|
child: "Document"
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class Lazy:
|
|
value: typing.Callable[[], "Document"] | "Document"
|
|
|
|
def resolve(self) -> "Document":
|
|
if callable(self.value):
|
|
self.value = self.value()
|
|
return self.value
|
|
|
|
@classmethod
|
|
def from_tree(cls, tree: runtime.Tree, printer: "Printer") -> "Lazy":
|
|
return Lazy(lambda: printer.convert_tree_to_document(tree))
|
|
|
|
|
|
Document = None | Text | Literal | NewLine | ForceBreak | Cons | Indent | Group | Lazy
|
|
|
|
|
|
class DocumentLayout:
|
|
segments: list[str | tuple[int, int]]
|
|
|
|
def __init__(self, segments):
|
|
self.segments = segments
|
|
|
|
def apply_to_source(self, original: str) -> str:
|
|
result = ""
|
|
for segment in self.segments:
|
|
if isinstance(segment, str):
|
|
result += segment
|
|
else:
|
|
start, end = segment
|
|
result += original[start:end]
|
|
|
|
return result
|
|
|
|
|
|
def layout_document(doc: Document, width: int) -> DocumentLayout:
|
|
"""Lay out a document to fit within the given width.
|
|
|
|
The result of this function is a layout which can trivially be converted
|
|
into a string given the original document.
|
|
"""
|
|
|
|
@dataclasses.dataclass
|
|
class Chunk:
|
|
doc: Document
|
|
indent: int
|
|
flat: bool
|
|
|
|
def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
|
|
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
|
|
|
|
column = 0
|
|
chunks: list[Chunk] = [Chunk(doc=doc, indent=0, flat=False)]
|
|
|
|
def fits(chunk: Chunk) -> bool:
|
|
remaining = width - column
|
|
if remaining <= 0:
|
|
return False
|
|
|
|
stack = list(chunks)
|
|
stack.append(chunk)
|
|
while len(stack) > 0:
|
|
chunk = stack.pop()
|
|
match chunk.doc:
|
|
case None:
|
|
pass
|
|
|
|
case Text(start, end):
|
|
remaining -= end - start
|
|
|
|
case Literal(text):
|
|
remaining -= len(text)
|
|
|
|
case NewLine(replace):
|
|
if chunk.flat:
|
|
remaining -= len(replace)
|
|
else:
|
|
# These are newlines that are real, so it must have
|
|
# all fit.
|
|
return True
|
|
|
|
case ForceBreak():
|
|
# If we're in a flattened chunk then force it to break by
|
|
# returning false here, otherwise we're at the end of the
|
|
# line and yes, whatever you were asking about has fit.
|
|
return not chunk.flat
|
|
|
|
case Cons(left, right):
|
|
stack.append(chunk.with_document(right))
|
|
stack.append(chunk.with_document(left))
|
|
|
|
case Lazy():
|
|
stack.append(chunk.with_document(chunk.doc.resolve()))
|
|
|
|
case Indent(amount, child):
|
|
stack.append(chunk.with_document(child, and_indent=amount))
|
|
|
|
case Group(child):
|
|
# The difference between this approach and Justin's twist
|
|
# is that we consider the flat variable in Newline(),
|
|
# above, rather than here in Group. This makes us more
|
|
# like Wadler's original formulation, I guess. The
|
|
# grouping is an implicit transform over alternatives
|
|
# represented by newline. (If we have other kinds of
|
|
# alternatives we'll have to work those out elsewhere as
|
|
# well.)
|
|
stack.append(chunk.with_document(child))
|
|
|
|
case _:
|
|
typing.assert_never(chunk.doc)
|
|
|
|
if remaining < 0:
|
|
return False
|
|
|
|
return True # Everything must fit, so great!
|
|
|
|
output: list[str | tuple[int, int]] = []
|
|
while len(chunks) > 0:
|
|
chunk = chunks.pop()
|
|
match chunk.doc:
|
|
case None:
|
|
pass
|
|
|
|
case Text(start, end):
|
|
output.append((start, end))
|
|
column += end - start
|
|
|
|
case Literal(text):
|
|
output.append(text)
|
|
column += len(text)
|
|
|
|
case NewLine(replace):
|
|
if chunk.flat:
|
|
output.append(replace)
|
|
column += len(replace)
|
|
else:
|
|
# TODO: Custom newline expansion, custom indent segments.
|
|
output.append("\n" + (chunk.indent * " "))
|
|
column = chunk.indent
|
|
|
|
case ForceBreak():
|
|
# TODO: Custom newline expansion, custom indent segments.
|
|
output.append("\n" + (chunk.indent * " "))
|
|
column = chunk.indent
|
|
|
|
case Cons(left, right):
|
|
chunks.append(chunk.with_document(right))
|
|
chunks.append(chunk.with_document(left))
|
|
|
|
case Indent(amount, doc):
|
|
chunks.append(chunk.with_document(doc, and_indent=amount))
|
|
|
|
case Lazy():
|
|
chunks.append(chunk.with_document(chunk.doc.resolve()))
|
|
|
|
case Group(child):
|
|
candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
|
|
if chunk.flat or fits(candidate):
|
|
chunks.append(candidate)
|
|
else:
|
|
chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
|
|
|
|
case _:
|
|
typing.assert_never(chunk)
|
|
|
|
return DocumentLayout(output)
|
|
|
|
|
|
def resolve_document(doc: Document) -> Document:
|
|
match doc:
|
|
case Cons(left, right):
|
|
lr = resolve_document(left)
|
|
rr = resolve_document(right)
|
|
if lr is not left or rr is not right:
|
|
return cons(lr, rr)
|
|
else:
|
|
return doc
|
|
|
|
case Lazy(_):
|
|
return resolve_document(doc.resolve())
|
|
|
|
case _:
|
|
return doc
|
|
|
|
|
|
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
|
|
# TODO: RECONSIDER THE EXISTENCE OF THIS FUNCTION
|
|
# The naming condition is important but
|
|
if isinstance(child, runtime.Tree):
|
|
return f"tree_{child.name}"
|
|
else:
|
|
return f"token_{child.kind}"
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class Matcher:
|
|
table: parser.ParseTable
|
|
indent_amounts: dict[str, int]
|
|
newline_replace: dict[str, str]
|
|
|
|
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
|
|
stack: list[tuple[int, Document]] = [(0, None)]
|
|
table = self.table
|
|
|
|
input = [(child_to_name(i), i) for i in items] + [
|
|
(
|
|
"$",
|
|
runtime.TokenValue(
|
|
kind="$",
|
|
start=0,
|
|
end=0,
|
|
pre_trivia=[],
|
|
post_trivia=[],
|
|
),
|
|
)
|
|
]
|
|
input_index = 0
|
|
|
|
while True:
|
|
current_token = input[input_index]
|
|
current_state = stack[-1][0]
|
|
action = table.actions[current_state].get(current_token[0], parser.Error())
|
|
|
|
match action:
|
|
case parser.Accept():
|
|
return stack[-1][1]
|
|
|
|
case parser.Reduce(name=name, count=size):
|
|
child: Document = None
|
|
if size > 0:
|
|
for _, c in stack[-size:]:
|
|
if c is None:
|
|
continue
|
|
child = cons(child, c)
|
|
del stack[-size:]
|
|
|
|
if name[0] == "g":
|
|
child = Group(child)
|
|
|
|
elif name[0] == "i":
|
|
amount = self.indent_amounts[name]
|
|
child = Indent(amount, child)
|
|
|
|
elif name[0] == "n":
|
|
replace = self.newline_replace[name]
|
|
child = cons(child, NewLine(replace))
|
|
|
|
elif name[0] == "p":
|
|
child = cons(NewLine(""), child)
|
|
|
|
elif name[0] == "f":
|
|
child = cons(child, ForceBreak())
|
|
|
|
else:
|
|
pass # Reducing a transparent rule probably.
|
|
|
|
goto = table.gotos[stack[-1][0]].get(name)
|
|
assert goto is not None
|
|
stack.append((goto, child))
|
|
|
|
case parser.Shift():
|
|
value = current_token[1]
|
|
|
|
if isinstance(value, runtime.Tree):
|
|
child = Lazy.from_tree(value, printer)
|
|
else:
|
|
# TODO: Consider trivia and preserve comments!
|
|
child = Text(value.start, value.end)
|
|
|
|
stack.append((action.state, child))
|
|
input_index += 1
|
|
|
|
case parser.Error():
|
|
raise Exception("How did I get a parse error here??")
|
|
|
|
|
|
class Printer:
|
|
# TODO: Pre-generate the matcher tables for a grammar, to make it
|
|
# possible to do codegen in other languages.
|
|
grammar: parser.Grammar
|
|
_matchers: dict[str, Matcher]
|
|
_nonterminals: dict[str, parser.NonTerminal]
|
|
|
|
def __init__(self, grammar: parser.Grammar):
|
|
self.grammar = grammar
|
|
self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
|
|
self._matchers = {}
|
|
|
|
def lookup_nonterminal(self, name: str) -> parser.NonTerminal:
|
|
return self._nonterminals[name]
|
|
|
|
def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
|
|
generated_grammar: list[typing.Tuple[str, list[str]]] = []
|
|
visited: set[str] = set()
|
|
groups: dict[tuple[str, ...], str] = {}
|
|
indent_amounts: dict[str, int] = {}
|
|
newline_map: dict[str, str] = {}
|
|
done_forced_break = False
|
|
|
|
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
|
if name not in visited:
|
|
visited.add(name)
|
|
for production in rule.fn(self.grammar).flatten(with_metadata=True):
|
|
trans_prod = compile_production(production)
|
|
generated_grammar.append((name, trans_prod))
|
|
|
|
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
|
nonlocal groups
|
|
nonlocal indent_amounts
|
|
nonlocal done_forced_break
|
|
|
|
result = []
|
|
for item in production:
|
|
if isinstance(item, str):
|
|
nt = self._nonterminals[item]
|
|
if nt.transparent:
|
|
# If it's transparent then we make a new set of
|
|
# productions that covers the contents of the
|
|
# transparent nonterminal.
|
|
name = "xxx_" + nt.name
|
|
compile_nonterminal(name, nt)
|
|
result.append(name)
|
|
else:
|
|
# Otherwise it's a "token" in our input, named
|
|
# "tree_{whatever}".
|
|
result.append(f"tree_{item}")
|
|
|
|
elif isinstance(item, parser.Terminal):
|
|
# If it's a terminal it will appear in our input as
|
|
# "token_{whatever}".
|
|
result.append(f"token_{item.name}")
|
|
|
|
else:
|
|
meta, children = item
|
|
tx_children = compile_production(children)
|
|
|
|
pretty = meta.get("format")
|
|
if isinstance(pretty, parser.FormatMeta):
|
|
if pretty.group:
|
|
# Make a fake rule.
|
|
child_key = tuple(tx_children)
|
|
rule_name = groups.get(child_key)
|
|
if rule_name is None:
|
|
rule_name = f"g_{len(groups)}"
|
|
groups[child_key] = rule_name
|
|
generated_grammar.append((rule_name, tx_children))
|
|
|
|
tx_children = [rule_name]
|
|
|
|
if pretty.indent:
|
|
rule_name = f"i_{len(indent_amounts)}"
|
|
indent_amounts[rule_name] = pretty.indent
|
|
generated_grammar.append((rule_name, tx_children))
|
|
tx_children = [rule_name]
|
|
|
|
if pretty.newline is not None:
|
|
newline_rule_name = newline_map.get(pretty.newline)
|
|
if newline_rule_name is None:
|
|
newline_rule_name = f"n{len(newline_map)}"
|
|
newline_map[pretty.newline] = newline_rule_name
|
|
generated_grammar.append((newline_rule_name, []))
|
|
|
|
tx_children.append(newline_rule_name)
|
|
|
|
if pretty.forced_break:
|
|
if not done_forced_break:
|
|
generated_grammar.append(("forced_break", []))
|
|
done_forced_break = True
|
|
|
|
tx_children.append("forced_break")
|
|
|
|
# If it turned out to have formatting meta then we will
|
|
# have replaced or augmented the translated children
|
|
# appropriately. Otherwise, if it's highlighting meta or
|
|
# something else, we'll have ignored it and the
|
|
# translated children should just be inserted inline.
|
|
result.extend(tx_children)
|
|
|
|
return result
|
|
|
|
compile_nonterminal(rule.name, rule)
|
|
gen = self.grammar._generator(rule.name, generated_grammar)
|
|
parse_table = gen.gen_table()
|
|
|
|
newline_replace = {v: k for k, v in newline_map.items()}
|
|
return Matcher(
|
|
parse_table,
|
|
indent_amounts,
|
|
newline_replace,
|
|
)
|
|
|
|
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
|
|
result = self._matchers.get(rule.name)
|
|
if result is None:
|
|
result = self.compile_rule(rule)
|
|
self._matchers[rule.name] = result
|
|
|
|
return result
|
|
|
|
def convert_tree_to_document(self, tree: runtime.Tree) -> Document:
|
|
name = tree.name
|
|
assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
|
|
|
|
rule = self.lookup_nonterminal(name)
|
|
matcher = self.rule_to_matcher(rule)
|
|
m = matcher.match(self, list(tree.children))
|
|
if m is None:
|
|
raise ValueError(
|
|
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
|
|
)
|
|
return resolve_document(m)
|
|
|
|
def format_tree(self, tree: runtime.Tree, width: int) -> DocumentLayout:
|
|
doc = self.convert_tree_to_document(tree)
|
|
return layout_document(doc, width)
|