[wadler] Re-factor into multiple modules
Hard split between builder and runtime, as is proper.
This commit is contained in:
parent
1f84752538
commit
1a3ce02d48
4 changed files with 370 additions and 267 deletions
5
parser/wadler/__init__.py
Normal file
5
parser/wadler/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
# A prettier printer.
|
||||
from . import builder
|
||||
from . import runtime
|
||||
|
||||
from .builder import *
|
||||
316
parser/wadler/builder.py
Normal file
316
parser/wadler/builder.py
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
"""Data structures to support pretty-printing.
|
||||
|
||||
Just like the parse tables, these tables could be written out in a different
|
||||
format and used to drive a pretty-printer written in another programming
|
||||
language, probably paired with a parser runtime written in that same language.
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
import typing
|
||||
|
||||
from .. import parser
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MatcherTable:
|
||||
"""Information necessary to create a document from a single node of a
|
||||
concrete parse tree as generated by the parser.
|
||||
|
||||
A "document" in this case is a wadler-style document. See the
|
||||
documentation of the module for what kinds of document nodes we expect
|
||||
to generate.
|
||||
|
||||
The grammar contains extra metadata about how to add line-breaks and
|
||||
whatnot, but that information was discarded during the parse. (We don't
|
||||
need it!) That means we need to recover it after the fact. It would be
|
||||
easy, except transparent rules mean that the series of tree children
|
||||
form a context-free language instead of a regular language, and so we
|
||||
actually need a full parser again to recover the structure.
|
||||
|
||||
The data to drive that parse is in `table`, which is an LR parse table of
|
||||
the usual form produced by this parser generator. To build the document,
|
||||
use the actions in the parse table to drive an LR parse, maintaining a
|
||||
stack of documents as you go.
|
||||
|
||||
When matching terminals, interpret symbol names as follows:
|
||||
|
||||
- `token_[NAME]` symbols are token children in the tree node we're parsing.
|
||||
(The token will have the name [NAME].) These should get shifted onto the
|
||||
stack as plain-text document nodes.
|
||||
|
||||
- `tree_[KIND]` symbols are tree node children in the tree node we're
|
||||
parsing. (The tree kind will be [KIND].) These should get shifted onto
|
||||
the stack as document nodes, but recursively (by matching *their* children
|
||||
with the same strategy.)
|
||||
|
||||
When reducing nonterminals, first concatenate all of the documents you remove
|
||||
from the stack into a single document, then use the first character to
|
||||
determine what (if any) additional work to do to the document:
|
||||
|
||||
- `i...` symbols are productions used to generated "indent" documents. The
|
||||
`indent_amounts` dict indicates how far to indent each production. The
|
||||
concatenated documents become the child of the indent.
|
||||
|
||||
- `g...` symbols are productions used to generate "group" documents. The
|
||||
concatenated documents become the child of the group.
|
||||
|
||||
- `n...` symbols are productions that generate newlines. A newline document
|
||||
should be created and appended to the concatenated documents. The
|
||||
`newline_replace` dict indicates what the replacement text for the newline
|
||||
document should be.
|
||||
|
||||
- `p...` symbols are just like `n...` symbols, except the newline symbol
|
||||
is prepended instead of appended.
|
||||
|
||||
- `f...` symbols are like `n...` symbols, except that a force-break document
|
||||
is appended instead of a newline document.
|
||||
|
||||
- `d...` symbols are like `f...` symbols, except that the force-break
|
||||
document is prepended instead of appended.
|
||||
|
||||
- Any other prefix should be ignored.
|
||||
"""
|
||||
|
||||
# Parse table to recover the node into a document
|
||||
table: parser.ParseTable
|
||||
# Mapping from the name of i_ rules to indent counts
|
||||
indent_amounts: dict[str, int]
|
||||
# Mapping from the names of n_ rules to the text they flatten to
|
||||
newline_replace: dict[str, str]
|
||||
|
||||
|
||||
def _compile_nonterminal_matcher(
|
||||
grammar: parser.Grammar,
|
||||
nonterminals: dict[str, parser.NonTerminal],
|
||||
rule: parser.NonTerminal,
|
||||
) -> MatcherTable:
|
||||
"""Generate a matcher table for a single nonterminal.
|
||||
|
||||
See the docs for [MatcherTable] to understand the result.
|
||||
"""
|
||||
generated_grammar: list[typing.Tuple[str, list[str]]] = []
|
||||
visited: set[str] = set()
|
||||
|
||||
# In order to generate groups, indents, and newlines we need to
|
||||
# synthesize new productions. And it happens sometimes that we get
|
||||
# duplicates, repeated synthetic productions. It's important to
|
||||
# de-duplicate productions, otherwise we'll wind up with ambiguities in
|
||||
# the parser.
|
||||
#
|
||||
# These dictionaries track the synthetic rules: the keys are production
|
||||
# and also the parameter (if any), and the values are the names of the
|
||||
# productions that produce the effect.
|
||||
#
|
||||
groups: dict[tuple[str, ...], str] = {}
|
||||
indents: dict[tuple[tuple[str, ...], int], str] = {}
|
||||
newlines: dict[tuple[tuple[str, ...], str], str] = {}
|
||||
prefix_count: int = 0
|
||||
|
||||
final_newlines: dict[str, str] = {}
|
||||
|
||||
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
||||
if name not in visited:
|
||||
visited.add(name)
|
||||
for production in rule.fn(grammar).flatten(with_metadata=True):
|
||||
trans_prod = compile_production(production)
|
||||
generated_grammar.append((name, trans_prod))
|
||||
|
||||
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
||||
nonlocal groups
|
||||
nonlocal indents
|
||||
nonlocal newlines
|
||||
nonlocal prefix_count
|
||||
nonlocal final_newlines
|
||||
|
||||
prefix_stack: list[str] = []
|
||||
|
||||
result = []
|
||||
for item in production:
|
||||
if isinstance(item, str):
|
||||
nt = nonterminals[item]
|
||||
if nt.transparent:
|
||||
# If it's transparent then we make a new set of
|
||||
# productions that covers the contents of the
|
||||
# transparent nonterminal.
|
||||
name = "xxx_" + nt.name
|
||||
compile_nonterminal(name, nt)
|
||||
result.append(name)
|
||||
else:
|
||||
# Otherwise it's a "token" in our input, named
|
||||
# "tree_{whatever}".
|
||||
result.append(f"tree_{item}")
|
||||
|
||||
elif isinstance(item, parser.Terminal):
|
||||
# If it's a terminal it will appear in our input as
|
||||
# "token_{whatever}".
|
||||
result.append(f"token_{item.name}")
|
||||
|
||||
else:
|
||||
meta, children = item
|
||||
tx_children = compile_production(children)
|
||||
|
||||
pretty = meta.get("format")
|
||||
if isinstance(pretty, parser.FormatMeta):
|
||||
if pretty.group:
|
||||
# Generate a group rule.
|
||||
child_key = tuple(tx_children)
|
||||
rule_name = groups.get(child_key)
|
||||
if rule_name is None:
|
||||
rule_name = f"g_{len(groups)}"
|
||||
groups[child_key] = rule_name
|
||||
generated_grammar.append((rule_name, tx_children))
|
||||
|
||||
tx_children = [rule_name]
|
||||
|
||||
if pretty.indent:
|
||||
# Generate an indent rule.
|
||||
child_key = (tuple(tx_children), pretty.indent)
|
||||
rule_name = indents.get(child_key)
|
||||
if rule_name is None:
|
||||
rule_name = f"i_{len(indents)}"
|
||||
indents[child_key] = rule_name
|
||||
generated_grammar.append((rule_name, tx_children))
|
||||
|
||||
tx_children = [rule_name]
|
||||
|
||||
if pretty.newline is not None:
|
||||
# Generate a newline rule.
|
||||
#
|
||||
# Newline rules are complicated because we need to avoid
|
||||
# having a production that has zero children. Zero-child
|
||||
# productions generate unpredictable parse trees, even
|
||||
# when "unambiguous".
|
||||
#
|
||||
# Our first hedge is: if don't have any children for
|
||||
# this production but we *have* already converted some
|
||||
# stuff, then take the stuff we've already converted as
|
||||
# our child and wrap it in a newline production. (This
|
||||
# works when the newline is not the first element in the
|
||||
# production.)
|
||||
#
|
||||
if len(tx_children) == 0:
|
||||
tx_children = result
|
||||
result = []
|
||||
|
||||
if len(tx_children) > 0:
|
||||
# n == postfix newline.
|
||||
child_key = (tuple(tx_children), pretty.newline)
|
||||
rule_name = newlines.get(child_key)
|
||||
if rule_name is None:
|
||||
rule_name = f"n_{len(newlines)}"
|
||||
newlines[child_key] = rule_name
|
||||
generated_grammar.append((rule_name, tx_children))
|
||||
|
||||
tx_children = [rule_name]
|
||||
|
||||
else:
|
||||
# If we still have no tx_children then the newline must
|
||||
# be the first thing in the produciton. Ugh. We will
|
||||
# remember it for later, and apply it after we've
|
||||
# finished handling everything else.
|
||||
#
|
||||
# p == prefix newline
|
||||
rule_name = f"p_{prefix_count}"
|
||||
prefix_count += 1
|
||||
final_newlines[rule_name] = pretty.newline
|
||||
prefix_stack.append(rule_name)
|
||||
|
||||
if pretty.forced_break:
|
||||
# Generate a force-break rule.
|
||||
#
|
||||
# This follows the same strategies as newlines with
|
||||
# respect to empty productions.
|
||||
if len(tx_children) == 0:
|
||||
tx_children = result
|
||||
result = []
|
||||
|
||||
if len(tx_children) > 0:
|
||||
# f == postfix forced break
|
||||
rule_name = f"f_{prefix_count}"
|
||||
prefix_count += 1
|
||||
|
||||
generated_grammar.append((rule_name, tx_children))
|
||||
tx_children = [rule_name]
|
||||
else:
|
||||
# d == prefix forced break (so-named because 'd' is
|
||||
# to the right of 'f' on my keyboard)
|
||||
rule_name = f"d_{prefix_count}"
|
||||
prefix_count += 1
|
||||
prefix_stack.append(rule_name)
|
||||
|
||||
# If it turned out to have formatting meta then we will have
|
||||
# replaced or augmented the translated children appropriately.
|
||||
# Otherwise, if it's highlighting meta or something else, we
|
||||
# will have ignored it and the translated children should just
|
||||
# be inserted inline.
|
||||
result.extend(tx_children)
|
||||
|
||||
# Now is the time to handle any prefix rules, by wrapping the results in
|
||||
# a new production for the prefix and replacing the results with that
|
||||
# one.
|
||||
while len(prefix_stack) > 0:
|
||||
rule_name = prefix_stack.pop()
|
||||
generated_grammar.append((rule_name, result))
|
||||
result = [rule_name]
|
||||
|
||||
return result
|
||||
|
||||
start_name = f"yyy_{rule.name}"
|
||||
compile_nonterminal(start_name, rule)
|
||||
gen = grammar._generator(start_name, generated_grammar)
|
||||
parse_table = gen.gen_table()
|
||||
|
||||
for (_, replacement), rule_name in newlines.items():
|
||||
final_newlines[rule_name] = replacement
|
||||
|
||||
indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
|
||||
|
||||
return MatcherTable(
|
||||
parse_table,
|
||||
indent_amounts,
|
||||
final_newlines,
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PrettyTable:
|
||||
"""Information necessary to convert a parsed tree into a wadler-style
|
||||
pretty document, where it can then be formatted.
|
||||
|
||||
This is basically a bunch of "MatcherTables", one for each kind of tree,
|
||||
that tell us how to recover document structure from the tree node. We also
|
||||
record:
|
||||
|
||||
- The indentation string to use.
|
||||
- The trivia modes of any terminals, for use in reconstructing trivia.
|
||||
"""
|
||||
|
||||
indent: str
|
||||
trivia_modes: dict[str, parser.TriviaMode]
|
||||
matchers: dict[str, MatcherTable]
|
||||
|
||||
|
||||
def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> PrettyTable:
|
||||
"""Generate a [PrettyTable] to drive a pretty-printer from a grammar."""
|
||||
nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
|
||||
matchers = {}
|
||||
|
||||
if indent is None:
|
||||
indent = getattr(grammar, "pretty_indent", None)
|
||||
if indent is None:
|
||||
indent = " "
|
||||
|
||||
trivia_mode = {}
|
||||
for t in grammar.terminals():
|
||||
mode = t.meta.get("trivia_mode")
|
||||
if t.name is not None and isinstance(mode, parser.TriviaMode):
|
||||
trivia_mode[t.name] = mode
|
||||
|
||||
for name, rule in nonterminals.items():
|
||||
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
|
||||
|
||||
return PrettyTable(
|
||||
indent,
|
||||
trivia_mode,
|
||||
matchers,
|
||||
)
|
||||
604
parser/wadler/runtime.py
Normal file
604
parser/wadler/runtime.py
Normal file
|
|
@ -0,0 +1,604 @@
|
|||
import dataclasses
|
||||
import typing
|
||||
|
||||
from . import builder
|
||||
from .. import parser
|
||||
from .. import runtime
|
||||
|
||||
|
||||
############################################################################
|
||||
# Documents
|
||||
############################################################################
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Cons:
|
||||
docs: list["Document"]
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class NewLine:
|
||||
replace: str
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class ForceBreak:
|
||||
silent: bool
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Indent:
|
||||
amount: int
|
||||
doc: "Document"
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Literal:
|
||||
text: str
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Group:
|
||||
child: "Document"
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Marker:
|
||||
child: "Document"
|
||||
meta: dict
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Trivia:
|
||||
child: "Document"
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Lazy:
|
||||
value: typing.Callable[[], "Document"] | "Document"
|
||||
|
||||
def resolve(self) -> "Document":
|
||||
if callable(self.value):
|
||||
self.value = self.value()
|
||||
return self.value
|
||||
|
||||
@classmethod
|
||||
def from_tree(cls, tree: runtime.Tree, src: str, printer: "Printer") -> "Lazy":
|
||||
return Lazy(lambda: printer.convert_tree_to_document(tree, src))
|
||||
|
||||
|
||||
Document = None | Literal | NewLine | ForceBreak | Cons | Indent | Group | Trivia | Marker | Lazy
|
||||
|
||||
|
||||
def cons(*documents: Document) -> Document:
|
||||
if len(documents) == 0:
|
||||
return None
|
||||
|
||||
# TODO: Merge adjacent trivia together?
|
||||
|
||||
result = []
|
||||
for document in documents:
|
||||
if isinstance(document, Cons):
|
||||
result.extend(document.docs)
|
||||
elif document is not None:
|
||||
result.append(document)
|
||||
|
||||
if len(result) == 0:
|
||||
return None
|
||||
if len(result) == 1:
|
||||
return result[0]
|
||||
|
||||
return Cons(result)
|
||||
|
||||
|
||||
def group(document: Document) -> Document:
|
||||
if document is None:
|
||||
return None
|
||||
|
||||
if isinstance(document, Cons):
|
||||
children = list(document.docs)
|
||||
else:
|
||||
children = [document]
|
||||
|
||||
# Split the trivia off the left and right of the incoming group: trivia
|
||||
# at the edges shouldn't affect the inside of the group.
|
||||
right_trivia: list[Document] = []
|
||||
while len(children) > 0 and isinstance(children[-1], Trivia):
|
||||
right_trivia.append(children.pop())
|
||||
|
||||
children.reverse()
|
||||
left_trivia: list[Document] = []
|
||||
while len(children) > 0 and isinstance(children[-1], Trivia):
|
||||
left_trivia.append(children.pop())
|
||||
|
||||
# IF we still have more than one child, *then* we can actually make a
|
||||
# group. (A group with one child is a waste. A group with no children
|
||||
# doubly so.)
|
||||
children.reverse()
|
||||
if len(children) > 1:
|
||||
children = [Group(cons(*children))]
|
||||
|
||||
results = left_trivia + children + right_trivia
|
||||
return cons(*results)
|
||||
|
||||
|
||||
def trivia(document: Document) -> Document:
|
||||
if document is None:
|
||||
return None
|
||||
|
||||
if isinstance(document, Trivia):
|
||||
return document
|
||||
|
||||
return Trivia(document)
|
||||
|
||||
|
||||
############################################################################
|
||||
# Layouts
|
||||
############################################################################
|
||||
|
||||
|
||||
class DocumentLayout:
|
||||
"""A structure that is trivially convertable to a string; the result of
|
||||
layout out a document."""
|
||||
|
||||
segments: list[str | tuple[int, int]]
|
||||
|
||||
def __init__(self, segments):
|
||||
self.segments = segments
|
||||
|
||||
def apply_to_source(self, original: str) -> str:
|
||||
"""Convert this layout to a string by copying chunks of the source
|
||||
text into the right place.
|
||||
"""
|
||||
result = ""
|
||||
for segment in self.segments:
|
||||
if isinstance(segment, str):
|
||||
result += segment
|
||||
else:
|
||||
start, end = segment
|
||||
result += original[start:end]
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def layout_document(doc: Document, width: int, indent: str) -> DocumentLayout:
|
||||
"""Lay out a document to fit within the given width.
|
||||
|
||||
The result of this function is a DocumentLayout which can trivially be
|
||||
converted into a string given the original document.
|
||||
"""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Chunk:
|
||||
doc: Document
|
||||
indent: int
|
||||
flat: bool
|
||||
|
||||
def with_document(self, doc: Document, and_indent: int = 0) -> "Chunk":
|
||||
return Chunk(doc=doc, indent=self.indent + and_indent, flat=self.flat)
|
||||
|
||||
column = 0
|
||||
chunks: list[Chunk] = [
|
||||
Chunk(
|
||||
doc=doc,
|
||||
indent=0,
|
||||
flat=False, # NOTE: Assume flat until we know how to break.
|
||||
)
|
||||
]
|
||||
|
||||
def fits(chunk: Chunk) -> bool:
|
||||
remaining = width - column
|
||||
if remaining <= 0:
|
||||
return False
|
||||
|
||||
stack = list(chunks)
|
||||
stack.append(chunk)
|
||||
while len(stack) > 0:
|
||||
chunk = stack.pop()
|
||||
match chunk.doc:
|
||||
case None:
|
||||
pass
|
||||
|
||||
case Literal(text):
|
||||
remaining -= len(text)
|
||||
|
||||
case NewLine(replace):
|
||||
if chunk.flat:
|
||||
remaining -= len(replace)
|
||||
else:
|
||||
# These are newlines that are real, so it must have
|
||||
# all fit.
|
||||
return True
|
||||
|
||||
case ForceBreak():
|
||||
# If we're in a flattened chunk then force it to break by
|
||||
# returning false here, otherwise we're at the end of the
|
||||
# line and yes, whatever you were asking about has fit.
|
||||
return not chunk.flat
|
||||
|
||||
case Cons(docs):
|
||||
stack.extend(chunk.with_document(doc) for doc in reversed(docs))
|
||||
|
||||
case Lazy():
|
||||
stack.append(chunk.with_document(chunk.doc.resolve()))
|
||||
|
||||
case Indent(amount, child):
|
||||
stack.append(chunk.with_document(child, and_indent=amount))
|
||||
|
||||
case Group(child):
|
||||
# The difference between this approach and Justin's twist
|
||||
# is that we consider the flat variable in Newline(),
|
||||
# above, rather than here in Group. This makes us more
|
||||
# like Wadler's original formulation, I guess. The
|
||||
# grouping is an implicit transform over alternatives
|
||||
# represented by newline. (If we have other kinds of
|
||||
# alternatives we'll have to work those out elsewhere as
|
||||
# well.)
|
||||
stack.append(chunk.with_document(child))
|
||||
|
||||
case Marker():
|
||||
stack.append(chunk.with_document(chunk.doc.child))
|
||||
|
||||
case Trivia(child):
|
||||
stack.append(chunk.with_document(child))
|
||||
|
||||
case _:
|
||||
typing.assert_never(chunk.doc)
|
||||
|
||||
if remaining < 0:
|
||||
return False
|
||||
|
||||
return True # Everything must fit, so great!
|
||||
|
||||
output: list[str | tuple[int, int]] = []
|
||||
while len(chunks) > 0:
|
||||
chunk = chunks.pop()
|
||||
match chunk.doc:
|
||||
case None:
|
||||
pass
|
||||
|
||||
case Literal(text):
|
||||
output.append(text)
|
||||
column += len(text)
|
||||
|
||||
case NewLine(replace):
|
||||
if chunk.flat:
|
||||
output.append(replace)
|
||||
column += len(replace)
|
||||
else:
|
||||
# TODO: Custom newline expansion, custom indent segments.
|
||||
output.append("\n" + (chunk.indent * indent))
|
||||
column = chunk.indent * len(indent)
|
||||
|
||||
case ForceBreak(silent):
|
||||
# TODO: Custom newline expansion, custom indent segments.
|
||||
if not silent:
|
||||
output.append("\n" + (chunk.indent * indent))
|
||||
column = chunk.indent * len(indent)
|
||||
|
||||
case Cons(docs):
|
||||
chunks.extend(chunk.with_document(doc) for doc in reversed(docs))
|
||||
|
||||
case Indent(amount, doc):
|
||||
chunks.append(chunk.with_document(doc, and_indent=amount))
|
||||
|
||||
case Lazy():
|
||||
chunks.append(chunk.with_document(chunk.doc.resolve()))
|
||||
|
||||
case Group(child):
|
||||
candidate = Chunk(doc=child, indent=chunk.indent, flat=True)
|
||||
if chunk.flat or fits(candidate):
|
||||
chunks.append(candidate)
|
||||
else:
|
||||
chunks.append(Chunk(doc=child, indent=chunk.indent, flat=False))
|
||||
|
||||
case Marker():
|
||||
chunks.append(chunk.with_document(chunk.doc.child))
|
||||
|
||||
case Trivia(child):
|
||||
chunks.append(chunk.with_document(child))
|
||||
|
||||
case _:
|
||||
typing.assert_never(chunk)
|
||||
|
||||
return DocumentLayout(output)
|
||||
|
||||
|
||||
def resolve_document(doc: Document) -> Document:
|
||||
match doc:
|
||||
case Cons(docs):
|
||||
docs = [resolve_document(d) for d in docs]
|
||||
return cons(*docs)
|
||||
|
||||
case Lazy(_):
|
||||
return resolve_document(doc.resolve())
|
||||
|
||||
case Group(doc):
|
||||
return group(resolve_document(doc))
|
||||
|
||||
case Marker(child, meta):
|
||||
return Marker(resolve_document(child), meta)
|
||||
|
||||
case Trivia(child):
|
||||
return Trivia(resolve_document(child))
|
||||
|
||||
case Literal() | NewLine() | ForceBreak() | Indent() | None:
|
||||
return doc
|
||||
|
||||
case _:
|
||||
typing.assert_never(doc)
|
||||
|
||||
|
||||
def child_to_name(child: runtime.Tree | runtime.TokenValue) -> str:
|
||||
if isinstance(child, runtime.Tree):
|
||||
return f"tree_{child.name}"
|
||||
else:
|
||||
return f"token_{child.kind}"
|
||||
|
||||
|
||||
def slice_pre_post_trivia(
|
||||
trivia_mode: dict[str, parser.TriviaMode],
|
||||
trivia_tokens: list[runtime.TokenValue],
|
||||
) -> tuple[
|
||||
list[tuple[parser.TriviaMode, runtime.TokenValue]],
|
||||
list[tuple[parser.TriviaMode, runtime.TokenValue]],
|
||||
]:
|
||||
tokens = [
|
||||
(trivia_mode.get(token.kind, parser.TriviaMode.Blank), token) for token in trivia_tokens
|
||||
]
|
||||
|
||||
for index, (mode, token) in enumerate(tokens):
|
||||
if token.start == 0:
|
||||
# Everything is pre-trivia if we're at the start of the file.
|
||||
return (tokens, [])
|
||||
|
||||
if mode == parser.TriviaMode.NewLine:
|
||||
# This is the first newline; it belongs with the pre-trivia.
|
||||
return (tokens[index:], tokens[:index])
|
||||
|
||||
# If we never found a new line then it's all post-trivia.
|
||||
return ([], tokens)
|
||||
|
||||
|
||||
############################################################################
|
||||
# The Actual Pretty Printer
|
||||
############################################################################
|
||||
|
||||
|
||||
class Matcher:
|
||||
table: builder.MatcherTable
|
||||
trivia_mode: dict[str, parser.TriviaMode]
|
||||
|
||||
def __init__(self, table: builder.MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
|
||||
self.table = table
|
||||
self.trivia_mode = trivia_mode
|
||||
|
||||
def match(
|
||||
self,
|
||||
printer: "Printer",
|
||||
items: list[runtime.Tree | runtime.TokenValue],
|
||||
src: str,
|
||||
) -> Document:
|
||||
stack: list[tuple[int, Document]] = [(0, None)]
|
||||
table = self.table.table
|
||||
|
||||
# eof_trivia = []
|
||||
# if len(items) > 0:
|
||||
# item = items[-1]
|
||||
# if isinstance(item, runtime.TokenValue):
|
||||
# eof_trivia = item.post_trivia
|
||||
|
||||
input = [(child_to_name(i), i) for i in items] + [
|
||||
(
|
||||
"$",
|
||||
runtime.TokenValue(
|
||||
kind="$",
|
||||
start=0,
|
||||
end=0,
|
||||
pre_trivia=[],
|
||||
post_trivia=[],
|
||||
),
|
||||
)
|
||||
]
|
||||
input_index = 0
|
||||
|
||||
while True:
|
||||
current_token = input[input_index]
|
||||
current_state = stack[-1][0]
|
||||
action = table.actions[current_state].get(current_token[0], parser.Error())
|
||||
|
||||
match action:
|
||||
case parser.Accept():
|
||||
result = stack[-1][1]
|
||||
# result = cons(result, self.apply_trivia(eof_trivia))
|
||||
return result
|
||||
|
||||
case parser.Reduce(name=name, count=size):
|
||||
child: Document = None
|
||||
if size > 0:
|
||||
for _, c in stack[-size:]:
|
||||
if c is None:
|
||||
continue
|
||||
child = cons(child, c)
|
||||
del stack[-size:]
|
||||
|
||||
if name[0] == "g":
|
||||
child = group(child)
|
||||
|
||||
elif name[0] == "i":
|
||||
amount = self.table.indent_amounts[name]
|
||||
child = Indent(amount, child)
|
||||
|
||||
elif name[0] == "n":
|
||||
replace = self.table.newline_replace[name]
|
||||
child = cons(child, NewLine(replace))
|
||||
|
||||
elif name[0] == "p":
|
||||
replace = self.table.newline_replace[name]
|
||||
child = cons(NewLine(replace), child)
|
||||
|
||||
elif name[0] == "f":
|
||||
child = cons(child, ForceBreak(False))
|
||||
|
||||
elif name[0] == "d":
|
||||
child = cons(ForceBreak(False), child)
|
||||
|
||||
else:
|
||||
pass # Reducing a transparent rule probably.
|
||||
|
||||
goto = table.gotos[stack[-1][0]].get(name)
|
||||
assert goto is not None
|
||||
stack.append((goto, child))
|
||||
|
||||
case parser.Shift():
|
||||
value = current_token[1]
|
||||
|
||||
if isinstance(value, runtime.Tree):
|
||||
child = Lazy.from_tree(value, src, printer)
|
||||
else:
|
||||
child = cons(
|
||||
trivia(self.apply_pre_trivia(value.pre_trivia, src)),
|
||||
Literal(src[value.start : value.end]),
|
||||
trivia(self.apply_post_trivia(value.post_trivia, src)),
|
||||
)
|
||||
|
||||
stack.append((action.state, child))
|
||||
input_index += 1
|
||||
|
||||
case parser.Error():
|
||||
raise Exception("How did I get a parse error here??")
|
||||
|
||||
def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
||||
pre_trivia, _ = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
|
||||
if len(pre_trivia) == 0:
|
||||
return None
|
||||
|
||||
at_start_of_file = pre_trivia[0][1].start == 0
|
||||
|
||||
trivia_doc = None
|
||||
new_line_count = 0
|
||||
for mode, token in pre_trivia:
|
||||
match mode:
|
||||
case parser.TriviaMode.LineComment:
|
||||
trivia_doc = cons(
|
||||
trivia_doc,
|
||||
Literal(src[token.start : token.end]),
|
||||
ForceBreak(False),
|
||||
)
|
||||
new_line_count = 0 # There will be a newline after this.
|
||||
at_start_of_file = False
|
||||
|
||||
case parser.TriviaMode.Blank:
|
||||
pass
|
||||
|
||||
case parser.TriviaMode.NewLine:
|
||||
new_line_count += 1
|
||||
if new_line_count == 2 and not at_start_of_file:
|
||||
trivia_doc = cons(
|
||||
trivia_doc,
|
||||
ForceBreak(False),
|
||||
)
|
||||
|
||||
case _:
|
||||
typing.assert_never(mode)
|
||||
|
||||
return trivia_doc
|
||||
|
||||
def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
||||
if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
|
||||
return self.apply_eof_trivia(trivia_tokens, src)
|
||||
|
||||
_, post_trivia = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
|
||||
|
||||
trivia_doc = None
|
||||
for mode, token in post_trivia:
|
||||
match mode:
|
||||
case parser.TriviaMode.Blank:
|
||||
pass
|
||||
|
||||
case parser.TriviaMode.NewLine:
|
||||
# Anything after a line break is not processed as post
|
||||
# trivia.
|
||||
break
|
||||
|
||||
case parser.TriviaMode.LineComment:
|
||||
# Because this is post-trivia, we know there's something
|
||||
# to our left, and we can force the space.
|
||||
trivia_doc = cons(
|
||||
Literal(" "),
|
||||
Literal(src[token.start : token.end]),
|
||||
ForceBreak(True), # And the line needs to end.
|
||||
)
|
||||
break
|
||||
|
||||
case _:
|
||||
typing.assert_never(mode)
|
||||
|
||||
return trivia_doc
|
||||
|
||||
def apply_eof_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
|
||||
# EOF trivia has weird rules, namely, it's like pre and post joined together but.
|
||||
tokens = [
|
||||
(self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
|
||||
for token in trivia_tokens
|
||||
]
|
||||
|
||||
at_start = True
|
||||
newline_count = 0
|
||||
trivia_doc = None
|
||||
for mode, token in tokens:
|
||||
match mode:
|
||||
case parser.TriviaMode.Blank:
|
||||
pass
|
||||
|
||||
case parser.TriviaMode.NewLine:
|
||||
at_start = False
|
||||
newline_count += 1
|
||||
if newline_count <= 2:
|
||||
trivia_doc = cons(trivia_doc, ForceBreak(False))
|
||||
|
||||
case parser.TriviaMode.LineComment:
|
||||
# Because this is post-trivia, we know there's something
|
||||
# to our left, and we can force the space.
|
||||
trivia_doc = cons(
|
||||
trivia_doc,
|
||||
Literal(" ") if at_start else None,
|
||||
Literal(src[token.start : token.end]),
|
||||
)
|
||||
newline_count = 0
|
||||
at_start = False
|
||||
|
||||
case _:
|
||||
typing.assert_never(mode)
|
||||
|
||||
return trivia_doc
|
||||
|
||||
|
||||
class Printer:
|
||||
table: builder.PrettyTable
|
||||
matchers: dict[str, Matcher]
|
||||
|
||||
def __init__(self, table: builder.PrettyTable):
|
||||
self.table = table
|
||||
self.matchers = {
|
||||
name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
|
||||
}
|
||||
|
||||
def indent(self) -> str:
|
||||
return self.table.indent
|
||||
|
||||
def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
|
||||
name = tree.name
|
||||
assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
|
||||
|
||||
matcher = self.matchers[name]
|
||||
m = matcher.match(self, list(tree.children), src)
|
||||
if m is None:
|
||||
raise ValueError(
|
||||
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}"
|
||||
)
|
||||
return resolve_document(m)
|
||||
|
||||
def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
|
||||
doc = self.convert_tree_to_document(tree, src)
|
||||
return layout_document(doc, width, self.table.indent)
|
||||
Loading…
Add table
Add a link
Reference in a new issue