604 lines
19 KiB
Python
604 lines
19 KiB
Python
import dataclasses
|
|
import typing
|
|
|
|
from . import builder
|
|
from .. import parser
|
|
from .. import runtime
|
|
|
|
|
|
############################################################################
|
|
# Documents
|
|
############################################################################
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Cons:
|
|
docs: list["Document"]
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class NewLine:
|
|
replace: str
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class ForceBreak:
|
|
silent: bool
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Indent:
|
|
amount: int
|
|
doc: "Document"
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Literal:
|
|
text: str
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Group:
|
|
child: "Document"
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Marker:
|
|
child: "Document"
|
|
meta: dict
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class Trivia:
|
|
child: "Document"
|
|
|
|
|
|
@dataclasses.dataclass
|
|
class Lazy:
|
|
value: typing.Callable[[], "Document"] | "Document"
|
|
|
|
def resolve(self) -> "Document":
|
|
if callable(self.value):
|
|
self.value = self.value()
|
|
return self.value
|
|
|
|
@classmethod
|
|
def from_tree(cls, tree: runtime.Tree, src: str, printer: "Printer") -> "Lazy":
|
|
return Lazy(lambda: printer.convert_tree_to_document(tree, src))
|
|
|
|
|
|
Document = None | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
|
|
|
|
|
|
def cons(*documents: Document) -> Document:
|
|
if len(documents) == 0:
|
|
return None
|
|
|
|
# TODO: Merge adjacent trivia together?
|
|
|
|
result = []
|
|
for document in documents:
|
|
if isinstance(document, Cons):
|
|
result.extend(document.docs)
|
|
elif document is not None:
|
|
result.append(document)
|
|
|
|
if len(result) == 0:
|
|
return None
|
|
if len(result) == 1:
|
|
return result[0]
|
|
|
|
return Cons(result)
|
|
|
|
|
|
def group(document: Document) -> Document:
|
|
if document is None:
|
|
return None
|
|
|
|
if isinstance(document, Cons):
|
|
children = list(document.docs)
|
|
else:
|
|
children = [document]
|
|
|
|
# Split the trivia off the left and right of the incoming group: trivia
|
|
# at the edges shouldn't affect the inside of the group.
|
|
right_trivia: list[Document] = []
|
|
while len(children) > 0 and isinstance(children[-1], Trivia):
|
|
right_trivia.append(children.pop())
|
|
|
|
children.reverse()
|
|
left_trivia: list[Document] = []
|
|
while len(children) > 0 and isinstance(children[-1], Trivia):
|
|
left_trivia.append(children.pop())
|
|
|
|
# IF we still have more than one child, *then* we can actually make a
|
|
# group. (A group with one child is a waste. A group with no children
|
|
# doubly so.)
|
|
children.reverse()
|
|
if len(children) > 1:
|
|
children = [Group(cons(*children))]
|
|
|
|
results = left_trivia + children + right_trivia
|
|
return cons(*results)
|
|
|
|
|
|
def trivia(document: Document) -> Document:
|
|
if document is None:
|
|
return None
|
|
|
|
if isinstance(document, Trivia):
|
|
return document
|
|
|
|
return Trivia(document)
|
|
|
|
|
|
############################################################################
|
|
# Layouts
|
|
############################################################################
|
|
|
|
|
|
class DocumentLayout:
|
|
"""A structure that is trivially convertable to a string; the result of
|
|
layout out a document."""
|
|
|
|
segments: list[str | tuple[int, int]]
|
|
|
|
def __init__(self, segments):
|
|
self.segments = segments
|
|
|
|
def apply_to_source(self, original: str) -> str:
|
|
"""Convert this layout to a string by copying chunks of the source
|
|
text into the right place.
|
|
"""
|
|
result = ""
|
|
for segment in self.segments:
|
|
if isinstance(segment, str):
|
|
result += segment
|
|
else:
|
|
start, end = segment
|
|
result += original[start:end]
|
|
|
|
return result
|
|
|
|
|
|
def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
|
|
"""Lay out a document to fit within the given width.
|
|
|
|
The result of this function is a DocumentLayout which can trivially be
|
|
converted into a string given the original document.
|
|
"""
|
|
|
|
@dataclasses.dataclass
|
|
class Chunk:
|
|
doc: Document
|
|
indent: int
|
|
flat: bool
|
|
|
|
def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
|
|
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
|
|
|
|
column = 0
|
|
chunks: list[Chunk] = [
|
|
Chunk(
|
|
doc=doc,
|
|
indent=0,
|
|
flat=False, # NOTE: Assume flat until we know how to break.
|
|
)
|
|
]
|
|
|
|
def fits(chunk: Chunk) -> bool:
|
|
remaining = width - column
|
|
if remaining <= 0:
|
|
return False
|
|
|
|
stack = list(chunks)
|
|
stack.append(chunk)
|
|
while len(stack) > 0:
|
|
chunk = stack.pop()
|
|
match chunk.doc:
|
|
case None:
|
|
pass
|
|
|
|
case Literal(text):
|
|
remaining -= len(text)
|
|
|
|
case NewLine(replace):
|
|
if chunk.flat:
|
|
remaining -= len(replace)
|
|
else:
|
|
# These are newlines that are real, so it must have
|
|
# all fit.
|
|
return True
|
|
|
|
case ForceBreak():
|
|
# If we're in a flattened chunk then force it to break by
|
|
# returning false here, otherwise we're at the end of the
|
|
# line and yes, whatever you were asking about has fit.
|
|
return not chunk.flat
|
|
|
|
case Cons(docs):
|
|
stack.extend(chunk.with_document(doc) for doc in reversed(docs))
|
|
|
|
case Lazy():
|
|
stack.append(chunk.with_document(chunk.doc.resolve()))
|
|
|
|
case Indent(amount, child):
|
|
stack.append(chunk.with_document(child, and_indent=amount))
|
|
|
|
case Group(child):
|
|
# The difference between this approach and Justin's twist
|
|
# is that we consider the flat variable in Newline(),
|
|
# above, rather than here in Group. This makes us more
|
|
# like Wadler's original formulation, I guess. The
|
|
# grouping is an implicit transform over alternatives
|
|
# represented by newline. (If we have other kinds of
|
|
# alternatives we'll have to work those out elsewhere as
|
|
# well.)
|
|
stack.append(chunk.with_document(child))
|
|
|
|
case Marker():
|
|
stack.append(chunk.with_document(chunk.doc.child))
|
|
|
|
case Trivia(child):
|
|
stack.append(chunk.with_document(child))
|
|
|
|
case _:
|
|
typing.assert_never(chunk.doc)
|
|
|
|
if remaining < 0:
|
|
return False
|
|
|
|
return True # Everything must fit, so great!
|
|
|
|
output: list[str | tuple[int, int]] = []
|
|
while len(chunks) > 0:
|
|
chunk = chunks.pop()
|
|
match chunk.doc:
|
|
case None:
|
|
pass
|
|
|
|
case Literal(text):
|
|
output.append(text)
|
|
column += len(text)
|
|
|
|
case NewLine(replace):
|
|
if chunk.flat:
|
|
output.append(replace)
|
|
column += len(replace)
|
|
else:
|
|
# TODO: Custom newline expansion, custom indent segments.
|
|
output.append("\n" + (chunk.indent * indent))
|
|
column = chunk.indent * len(indent)
|
|
|
|
case ForceBreak(silent):
|
|
# TODO: Custom newline expansion, custom indent segments.
|
|
if not silent:
|
|
output.append("\n" + (chunk.indent * indent))
|
|
column = chunk.indent * len(indent)
|
|
|
|
case Cons(docs):
|
|
chunks.extend(chunk.with_document(doc) for doc in reversed(docs))
|
|
|
|
case Indent(amount, doc):
|
|
chunks.append(chunk.with_document(doc, and_indent=amount))
|
|
|
|
case Lazy():
|
|
chunks.append(chunk.with_document(chunk.doc.resolve()))
|
|
|
|
case Group(child):
|
|
candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
|
|
if chunk.flat or fits(candidate):
|
|
chunks.append(candidate)
|
|
else:
|
|
chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
|
|
|
|
case Marker():
|
|
chunks.append(chunk.with_document(chunk.doc.child))
|
|
|
|
case Trivia(child):
|
|
chunks.append(chunk.with_document(child))
|
|
|
|
case _:
|
|
typing.assert_never(chunk)
|
|
|
|
return DocumentLayout(output)
|
|
|
|
|
|
def resolve_document(doc: Document) -> Document:
|
|
match doc:
|
|
case Cons(docs):
|
|
docs = [resolve_document(d) for d in docs]
|
|
return cons(*docs)
|
|
|
|
case Lazy(_):
|
|
return resolve_document(doc.resolve())
|
|
|
|
case Group(doc):
|
|
return group(resolve_document(doc))
|
|
|
|
case Marker(child, meta):
|
|
return Marker(resolve_document(child), meta)
|
|
|
|
case Trivia(child):
|
|
return Trivia(resolve_document(child))
|
|
|
|
case Literal() | NewLine() | ForceBreak() | Indent() | None:
|
|
return doc
|
|
|
|
case _:
|
|
typing.assert_never(doc)
|
|
|
|
|
|
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
|
|
if isinstance(child, runtime.Tree):
|
|
return f"tree_{child.name}"
|
|
else:
|
|
return f"token_{child.kind}"
|
|
|
|
|
|
def slice_pre_post_trivia(
|
|
trivia_mode: dict[str, parser.TriviaMode],
|
|
trivia_tokens: list[runtime.TokenValue],
|
|
) -> tuple[
|
|
list[tuple[parser.TriviaMode, runtime.TokenValue]],
|
|
list[tuple[parser.TriviaMode, runtime.TokenValue]],
|
|
]:
|
|
tokens = [
|
|
(trivia_mode.get(token.kind, parser.TriviaMode.Blank), token) for token in trivia_tokens
|
|
]
|
|
|
|
for index, (mode, token) in enumerate(tokens):
|
|
if token.start == 0:
|
|
# Everything is pre-trivia if we're at the start of the file.
|
|
return (tokens, [])
|
|
|
|
if mode == parser.TriviaMode.NewLine:
|
|
# This is the first newline; it belongs with the pre-trivia.
|
|
return (tokens[index:], tokens[:index])
|
|
|
|
# If we never found a new line then it's all post-trivia.
|
|
return ([], tokens)
|
|
|
|
|
|
############################################################################
|
|
# The Actual Pretty Printer
|
|
############################################################################
|
|
|
|
|
|
class Matcher:
|
|
table: builder.MatcherTable
|
|
trivia_mode: dict[str, parser.TriviaMode]
|
|
|
|
def __init__(self, table: builder.MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
|
|
self.table = table
|
|
self.trivia_mode = trivia_mode
|
|
|
|
def match(
|
|
self,
|
|
printer: "Printer",
|
|
items: list[runtime.Tree | runtime.TokenValue],
|
|
src: str,
|
|
) -> Document:
|
|
stack: list[tuple[int, Document]] = [(0, None)]
|
|
table = self.table.table
|
|
|
|
# eof_trivia = []
|
|
# if len(items) > 0:
|
|
# item = items[-1]
|
|
# if isinstance(item, runtime.TokenValue):
|
|
# eof_trivia = item.post_trivia
|
|
|
|
input = [(child_to_name(i), i) for i in items] + [
|
|
(
|
|
"$",
|
|
runtime.TokenValue(
|
|
kind="$",
|
|
start=0,
|
|
end=0,
|
|
pre_trivia=[],
|
|
post_trivia=[],
|
|
),
|
|
)
|
|
]
|
|
input_index = 0
|
|
|
|
while True:
|
|
current_token = input[input_index]
|
|
current_state = stack[-1][0]
|
|
action = table.actions[current_state].get(current_token[0], parser.Error())
|
|
|
|
match action:
|
|
case parser.Accept():
|
|
result = stack[-1][1]
|
|
# result = cons(result, self.apply_trivia(eof_trivia))
|
|
return result
|
|
|
|
case parser.Reduce(name=name, count=size):
|
|
child: Document = None
|
|
if size > 0:
|
|
for _, c in stack[-size:]:
|
|
if c is None:
|
|
continue
|
|
child = cons(child, c)
|
|
del stack[-size:]
|
|
|
|
if name[0] == "g":
|
|
child = group(child)
|
|
|
|
elif name[0] == "i":
|
|
amount = self.table.indent_amounts[name]
|
|
child = Indent(amount, child)
|
|
|
|
elif name[0] == "n":
|
|
replace = self.table.newline_replace[name]
|
|
child = cons(child, NewLine(replace))
|
|
|
|
elif name[0] == "p":
|
|
replace = self.table.newline_replace[name]
|
|
child = cons(NewLine(replace), child)
|
|
|
|
elif name[0] == "f":
|
|
child = cons(child, ForceBreak(False))
|
|
|
|
elif name[0] == "d":
|
|
child = cons(ForceBreak(False), child)
|
|
|
|
else:
|
|
pass # Reducing a transparent rule probably.
|
|
|
|
goto = table.gotos[stack[-1][0]].get(name)
|
|
assert goto is not None
|
|
stack.append((goto, child))
|
|
|
|
case parser.Shift():
|
|
value = current_token[1]
|
|
|
|
if isinstance(value, runtime.Tree):
|
|
child = Lazy.from_tree(value, src, printer)
|
|
else:
|
|
child = cons(
|
|
trivia(self.apply_pre_trivia(value.pre_trivia, src)),
|
|
Literal(src[value.start : value.end]),
|
|
trivia(self.apply_post_trivia(value.post_trivia, src)),
|
|
)
|
|
|
|
stack.append((action.state, child))
|
|
input_index += 1
|
|
|
|
case parser.Error():
|
|
raise Exception("How did I get a parse error here??")
|
|
|
|
def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
|
pre_trivia, _ = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
|
|
if len(pre_trivia) == 0:
|
|
return None
|
|
|
|
at_start_of_file = pre_trivia[0][1].start == 0
|
|
|
|
trivia_doc = None
|
|
new_line_count = 0
|
|
for mode, token in pre_trivia:
|
|
match mode:
|
|
case parser.TriviaMode.LineComment:
|
|
trivia_doc = cons(
|
|
trivia_doc,
|
|
Literal(src[token.start : token.end]),
|
|
ForceBreak(False),
|
|
)
|
|
new_line_count = 0 # There will be a newline after this.
|
|
at_start_of_file = False
|
|
|
|
case parser.TriviaMode.Blank:
|
|
pass
|
|
|
|
case parser.TriviaMode.NewLine:
|
|
new_line_count += 1
|
|
if new_line_count == 2 and not at_start_of_file:
|
|
trivia_doc = cons(
|
|
trivia_doc,
|
|
ForceBreak(False),
|
|
)
|
|
|
|
case _:
|
|
typing.assert_never(mode)
|
|
|
|
return trivia_doc
|
|
|
|
def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
|
if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
|
|
return self.apply_eof_trivia(trivia_tokens, src)
|
|
|
|
_, post_trivia = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
|
|
|
|
trivia_doc = None
|
|
for mode, token in post_trivia:
|
|
match mode:
|
|
case parser.TriviaMode.Blank:
|
|
pass
|
|
|
|
case parser.TriviaMode.NewLine:
|
|
# Anything after a line break is not processed as post
|
|
# trivia.
|
|
break
|
|
|
|
case parser.TriviaMode.LineComment:
|
|
# Because this is post-trivia, we know there's something
|
|
# to our left, and we can force the space.
|
|
trivia_doc = cons(
|
|
Literal(" "),
|
|
Literal(src[token.start : token.end]),
|
|
ForceBreak(True), # And the line needs to end.
|
|
)
|
|
break
|
|
|
|
case _:
|
|
typing.assert_never(mode)
|
|
|
|
return trivia_doc
|
|
|
|
def apply_eof_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
|
# EOF trivia has weird rules, namely, it's like pre and post joined together but.
|
|
tokens = [
|
|
(self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
|
|
for token in trivia_tokens
|
|
]
|
|
|
|
at_start = True
|
|
newline_count = 0
|
|
trivia_doc = None
|
|
for mode, token in tokens:
|
|
match mode:
|
|
case parser.TriviaMode.Blank:
|
|
pass
|
|
|
|
case parser.TriviaMode.NewLine:
|
|
at_start = False
|
|
newline_count += 1
|
|
if newline_count <= 2:
|
|
trivia_doc = cons(trivia_doc, ForceBreak(False))
|
|
|
|
case parser.TriviaMode.LineComment:
|
|
# Because this is post-trivia, we know there's something
|
|
# to our left, and we can force the space.
|
|
trivia_doc = cons(
|
|
trivia_doc,
|
|
Literal(" ") if at_start else None,
|
|
Literal(src[token.start : token.end]),
|
|
)
|
|
newline_count = 0
|
|
at_start = False
|
|
|
|
case _:
|
|
typing.assert_never(mode)
|
|
|
|
return trivia_doc
|
|
|
|
|
|
class Printer:
|
|
table: builder.PrettyTable
|
|
matchers: dict[str, Matcher]
|
|
|
|
def __init__(self, table: builder.PrettyTable):
|
|
self.table = table
|
|
self.matchers = {
|
|
name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
|
|
}
|
|
|
|
def indent(self) -> str:
|
|
return self.table.indent
|
|
|
|
def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
|
|
name = tree.name
|
|
assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
|
|
|
|
matcher = self.matchers[name]
|
|
m = matcher.match(self, list(tree.children), src)
|
|
if m is None:
|
|
raise ValueError(
|
|
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
|
|
)
|
|
return resolve_document(m)
|
|
|
|
def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
|
|
doc = self.convert_tree_to_document(tree, src)
|
|
return layout_document(doc, width, self.table.indent)
|