[wadler] Re-factor into multiple modules
Hard split between builder and runtime, as is proper.
This commit is contained in:
parent
1f84752538
commit
1a3ce02d48
4 changed files with 370 additions and 267 deletions
5
parser/wadler/__init__.py
Normal file
5
parser/wadler/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# A prettier printer.
|
||||||
|
from . import builder
|
||||||
|
from . import runtime
|
||||||
|
|
||||||
|
from .builder import *
|
||||||
316
parser/wadler/builder.py
Normal file
316
parser/wadler/builder.py
Normal file
|
|
@ -0,0 +1,316 @@
|
||||||
|
"""Data structures to support pretty-printing.
|
||||||
|
|
||||||
|
Just like the parse tables, these tables could be written out in a different
|
||||||
|
format and used to drive a pretty-printer written in another programming
|
||||||
|
language, probably paired with a parser runtime written in that same language.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from .. import parser
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class MatcherTable:
|
||||||
|
"""Information necessary to create a document from a single node of a
|
||||||
|
concrete parse tree as generated by the parser.
|
||||||
|
|
||||||
|
A "document" in this case is a wadler-style document. See the
|
||||||
|
documentation of the module for what kinds of document nodes we expect
|
||||||
|
to generate.
|
||||||
|
|
||||||
|
The grammar contains extra metadata about how to add line-breaks and
|
||||||
|
whatnot, but that information was discarded during the parse. (We don't
|
||||||
|
need it!) That means we need to recover it after the fact. It would be
|
||||||
|
easy, except transparent rules mean that the series of tree children
|
||||||
|
form a context-free language instead of a regular language, and so we
|
||||||
|
actually need a full parser again to recover the structure.
|
||||||
|
|
||||||
|
The data to drive that parse is in `table`, which is an LR parse table of
|
||||||
|
the usual form produced by this parser generator. To build the document,
|
||||||
|
use the actions in the parse table to drive an LR parse, maintaining a
|
||||||
|
stack of documents as you go.
|
||||||
|
|
||||||
|
When matching terminals, interpret symbol names as follows:
|
||||||
|
|
||||||
|
- `token_[NAME]` symbols are token children in the tree node we're parsing.
|
||||||
|
(The token will have the name [NAME].) These should get shifted onto the
|
||||||
|
stack as plain-text document nodes.
|
||||||
|
|
||||||
|
- `tree_[KIND]` symbols are tree node children in the tree node we're
|
||||||
|
parsing. (The tree kind will be [KIND].) These should get shifted onto
|
||||||
|
the stack as document nodes, but recursively (by matching *their* children
|
||||||
|
with the same strategy.)
|
||||||
|
|
||||||
|
When reducing nonterminals, first concatenate all of the documents you remove
|
||||||
|
from the stack into a single document, then use the first character to
|
||||||
|
determine what (if any) additional work to do to the document:
|
||||||
|
|
||||||
|
- `i...` symbols are productions used to generated "indent" documents. The
|
||||||
|
`indent_amounts` dict indicates how far to indent each production. The
|
||||||
|
concatenated documents become the child of the indent.
|
||||||
|
|
||||||
|
- `g...` symbols are productions used to generate "group" documents. The
|
||||||
|
concatenated documents become the child of the group.
|
||||||
|
|
||||||
|
- `n...` symbols are productions that generate newlines. A newline document
|
||||||
|
should be created and appended to the concatenated documents. The
|
||||||
|
`newline_replace` dict indicates what the replacement text for the newline
|
||||||
|
document should be.
|
||||||
|
|
||||||
|
- `p...` symbols are just like `n...` symbols, except the newline symbol
|
||||||
|
is prepended instead of appended.
|
||||||
|
|
||||||
|
- `f...` symbols are like `n...` symbols, except that a force-break document
|
||||||
|
is appended instead of a newline document.
|
||||||
|
|
||||||
|
- `d...` symbols are like `f...` symbols, except that the force-break
|
||||||
|
document is prepended instead of appended.
|
||||||
|
|
||||||
|
- Any other prefix should be ignored.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Parse table to recover the node into a document
|
||||||
|
table: parser.ParseTable
|
||||||
|
# Mapping from the name of i_ rules to indent counts
|
||||||
|
indent_amounts: dict[str, int]
|
||||||
|
# Mapping from the names of n_ rules to the text they flatten to
|
||||||
|
newline_replace: dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
|
def _compile_nonterminal_matcher(
|
||||||
|
grammar: parser.Grammar,
|
||||||
|
nonterminals: dict[str, parser.NonTerminal],
|
||||||
|
rule: parser.NonTerminal,
|
||||||
|
) -> MatcherTable:
|
||||||
|
"""Generate a matcher table for a single nonterminal.
|
||||||
|
|
||||||
|
See the docs for [MatcherTable] to understand the result.
|
||||||
|
"""
|
||||||
|
generated_grammar: list[typing.Tuple[str, list[str]]] = []
|
||||||
|
visited: set[str] = set()
|
||||||
|
|
||||||
|
# In order to generate groups, indents, and newlines we need to
|
||||||
|
# synthesize new productions. And it happens sometimes that we get
|
||||||
|
# duplicates, repeated synthetic productions. It's important to
|
||||||
|
# de-duplicate productions, otherwise we'll wind up with ambiguities in
|
||||||
|
# the parser.
|
||||||
|
#
|
||||||
|
# These dictionaries track the synthetic rules: the keys are production
|
||||||
|
# and also the parameter (if any), and the values are the names of the
|
||||||
|
# productions that produce the effect.
|
||||||
|
#
|
||||||
|
groups: dict[tuple[str, ...], str] = {}
|
||||||
|
indents: dict[tuple[tuple[str, ...], int], str] = {}
|
||||||
|
newlines: dict[tuple[tuple[str, ...], str], str] = {}
|
||||||
|
prefix_count: int = 0
|
||||||
|
|
||||||
|
final_newlines: dict[str, str] = {}
|
||||||
|
|
||||||
|
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
||||||
|
if name not in visited:
|
||||||
|
visited.add(name)
|
||||||
|
for production in rule.fn(grammar).flatten(with_metadata=True):
|
||||||
|
trans_prod = compile_production(production)
|
||||||
|
generated_grammar.append((name, trans_prod))
|
||||||
|
|
||||||
|
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
||||||
|
nonlocal groups
|
||||||
|
nonlocal indents
|
||||||
|
nonlocal newlines
|
||||||
|
nonlocal prefix_count
|
||||||
|
nonlocal final_newlines
|
||||||
|
|
||||||
|
prefix_stack: list[str] = []
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for item in production:
|
||||||
|
if isinstance(item, str):
|
||||||
|
nt = nonterminals[item]
|
||||||
|
if nt.transparent:
|
||||||
|
# If it's transparent then we make a new set of
|
||||||
|
# productions that covers the contents of the
|
||||||
|
# transparent nonterminal.
|
||||||
|
name = "xxx_" + nt.name
|
||||||
|
compile_nonterminal(name, nt)
|
||||||
|
result.append(name)
|
||||||
|
else:
|
||||||
|
# Otherwise it's a "token" in our input, named
|
||||||
|
# "tree_{whatever}".
|
||||||
|
result.append(f"tree_{item}")
|
||||||
|
|
||||||
|
elif isinstance(item, parser.Terminal):
|
||||||
|
# If it's a terminal it will appear in our input as
|
||||||
|
# "token_{whatever}".
|
||||||
|
result.append(f"token_{item.name}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
meta, children = item
|
||||||
|
tx_children = compile_production(children)
|
||||||
|
|
||||||
|
pretty = meta.get("format")
|
||||||
|
if isinstance(pretty, parser.FormatMeta):
|
||||||
|
if pretty.group:
|
||||||
|
# Generate a group rule.
|
||||||
|
child_key = tuple(tx_children)
|
||||||
|
rule_name = groups.get(child_key)
|
||||||
|
if rule_name is None:
|
||||||
|
rule_name = f"g_{len(groups)}"
|
||||||
|
groups[child_key] = rule_name
|
||||||
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
|
||||||
|
tx_children = [rule_name]
|
||||||
|
|
||||||
|
if pretty.indent:
|
||||||
|
# Generate an indent rule.
|
||||||
|
child_key = (tuple(tx_children), pretty.indent)
|
||||||
|
rule_name = indents.get(child_key)
|
||||||
|
if rule_name is None:
|
||||||
|
rule_name = f"i_{len(indents)}"
|
||||||
|
indents[child_key] = rule_name
|
||||||
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
|
||||||
|
tx_children = [rule_name]
|
||||||
|
|
||||||
|
if pretty.newline is not None:
|
||||||
|
# Generate a newline rule.
|
||||||
|
#
|
||||||
|
# Newline rules are complicated because we need to avoid
|
||||||
|
# having a production that has zero children. Zero-child
|
||||||
|
# productions generate unpredictable parse trees, even
|
||||||
|
# when "unambiguous".
|
||||||
|
#
|
||||||
|
# Our first hedge is: if don't have any children for
|
||||||
|
# this production but we *have* already converted some
|
||||||
|
# stuff, then take the stuff we've already converted as
|
||||||
|
# our child and wrap it in a newline production. (This
|
||||||
|
# works when the newline is not the first element in the
|
||||||
|
# production.)
|
||||||
|
#
|
||||||
|
if len(tx_children) == 0:
|
||||||
|
tx_children = result
|
||||||
|
result = []
|
||||||
|
|
||||||
|
if len(tx_children) > 0:
|
||||||
|
# n == postfix newline.
|
||||||
|
child_key = (tuple(tx_children), pretty.newline)
|
||||||
|
rule_name = newlines.get(child_key)
|
||||||
|
if rule_name is None:
|
||||||
|
rule_name = f"n_{len(newlines)}"
|
||||||
|
newlines[child_key] = rule_name
|
||||||
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
|
||||||
|
tx_children = [rule_name]
|
||||||
|
|
||||||
|
else:
|
||||||
|
# If we still have no tx_children then the newline must
|
||||||
|
# be the first thing in the produciton. Ugh. We will
|
||||||
|
# remember it for later, and apply it after we've
|
||||||
|
# finished handling everything else.
|
||||||
|
#
|
||||||
|
# p == prefix newline
|
||||||
|
rule_name = f"p_{prefix_count}"
|
||||||
|
prefix_count += 1
|
||||||
|
final_newlines[rule_name] = pretty.newline
|
||||||
|
prefix_stack.append(rule_name)
|
||||||
|
|
||||||
|
if pretty.forced_break:
|
||||||
|
# Generate a force-break rule.
|
||||||
|
#
|
||||||
|
# This follows the same strategies as newlines with
|
||||||
|
# respect to empty productions.
|
||||||
|
if len(tx_children) == 0:
|
||||||
|
tx_children = result
|
||||||
|
result = []
|
||||||
|
|
||||||
|
if len(tx_children) > 0:
|
||||||
|
# f == postfix forced break
|
||||||
|
rule_name = f"f_{prefix_count}"
|
||||||
|
prefix_count += 1
|
||||||
|
|
||||||
|
generated_grammar.append((rule_name, tx_children))
|
||||||
|
tx_children = [rule_name]
|
||||||
|
else:
|
||||||
|
# d == prefix forced break (so-named because 'd' is
|
||||||
|
# to the right of 'f' on my keyboard)
|
||||||
|
rule_name = f"d_{prefix_count}"
|
||||||
|
prefix_count += 1
|
||||||
|
prefix_stack.append(rule_name)
|
||||||
|
|
||||||
|
# If it turned out to have formatting meta then we will have
|
||||||
|
# replaced or augmented the translated children appropriately.
|
||||||
|
# Otherwise, if it's highlighting meta or something else, we
|
||||||
|
# will have ignored it and the translated children should just
|
||||||
|
# be inserted inline.
|
||||||
|
result.extend(tx_children)
|
||||||
|
|
||||||
|
# Now is the time to handle any prefix rules, by wrapping the results in
|
||||||
|
# a new production for the prefix and replacing the results with that
|
||||||
|
# one.
|
||||||
|
while len(prefix_stack) > 0:
|
||||||
|
rule_name = prefix_stack.pop()
|
||||||
|
generated_grammar.append((rule_name, result))
|
||||||
|
result = [rule_name]
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
start_name = f"yyy_{rule.name}"
|
||||||
|
compile_nonterminal(start_name, rule)
|
||||||
|
gen = grammar._generator(start_name, generated_grammar)
|
||||||
|
parse_table = gen.gen_table()
|
||||||
|
|
||||||
|
for (_, replacement), rule_name in newlines.items():
|
||||||
|
final_newlines[rule_name] = replacement
|
||||||
|
|
||||||
|
indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
|
||||||
|
|
||||||
|
return MatcherTable(
|
||||||
|
parse_table,
|
||||||
|
indent_amounts,
|
||||||
|
final_newlines,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class PrettyTable:
|
||||||
|
"""Information necessary to convert a parsed tree into a wadler-style
|
||||||
|
pretty document, where it can then be formatted.
|
||||||
|
|
||||||
|
This is basically a bunch of "MatcherTables", one for each kind of tree,
|
||||||
|
that tell us how to recover document structure from the tree node. We also
|
||||||
|
record:
|
||||||
|
|
||||||
|
- The indentation string to use.
|
||||||
|
- The trivia modes of any terminals, for use in reconstructing trivia.
|
||||||
|
"""
|
||||||
|
|
||||||
|
indent: str
|
||||||
|
trivia_modes: dict[str, parser.TriviaMode]
|
||||||
|
matchers: dict[str, MatcherTable]
|
||||||
|
|
||||||
|
|
||||||
|
def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> PrettyTable:
|
||||||
|
"""Generate a [PrettyTable] to drive a pretty-printer from a grammar."""
|
||||||
|
nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
|
||||||
|
matchers = {}
|
||||||
|
|
||||||
|
if indent is None:
|
||||||
|
indent = getattr(grammar, "pretty_indent", None)
|
||||||
|
if indent is None:
|
||||||
|
indent = " "
|
||||||
|
|
||||||
|
trivia_mode = {}
|
||||||
|
for t in grammar.terminals():
|
||||||
|
mode = t.meta.get("trivia_mode")
|
||||||
|
if t.name is not None and isinstance(mode, parser.TriviaMode):
|
||||||
|
trivia_mode[t.name] = mode
|
||||||
|
|
||||||
|
for name, rule in nonterminals.items():
|
||||||
|
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
|
||||||
|
|
||||||
|
return PrettyTable(
|
||||||
|
indent,
|
||||||
|
trivia_mode,
|
||||||
|
matchers,
|
||||||
|
)
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
# A prettier printer.
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
from . import parser
|
from . import builder
|
||||||
from . import runtime
|
from .. import parser
|
||||||
|
from .. import runtime
|
||||||
|
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
@ -360,240 +360,16 @@ def slice_pre_post_trivia(
|
||||||
return ([], tokens)
|
return ([], tokens)
|
||||||
|
|
||||||
|
|
||||||
############################################################################
|
|
||||||
# Data to Drive the Pretty Printer
|
|
||||||
############################################################################
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class MatcherTable:
|
|
||||||
"""Information necessary to create a document from a concrete parse tree,
|
|
||||||
as generated by the parser.
|
|
||||||
|
|
||||||
(In order to do this we need to re-parse the children of the tree, in
|
|
||||||
order to recover structure added by transparent rules. That's why each
|
|
||||||
MatcherTable has an associated ParseTable!)
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Parse table to recover the node into a document
|
|
||||||
table: parser.ParseTable
|
|
||||||
# Mapping from the name of i_ rules to indent counts
|
|
||||||
indent_amounts: dict[str, int]
|
|
||||||
# Mapping from the names of n_ rules to the text they flatten to
|
|
||||||
newline_replace: dict[str, str]
|
|
||||||
|
|
||||||
|
|
||||||
def _compile_nonterminal_matcher(
|
|
||||||
grammar: parser.Grammar,
|
|
||||||
nonterminals: dict[str, parser.NonTerminal],
|
|
||||||
rule: parser.NonTerminal,
|
|
||||||
) -> MatcherTable:
|
|
||||||
generated_grammar: list[typing.Tuple[str, list[str]]] = []
|
|
||||||
visited: set[str] = set()
|
|
||||||
|
|
||||||
# In order to generate groups, indents, and newlines we need to
|
|
||||||
# synthesize new productions. And it happens sometimes that we get
|
|
||||||
# duplicates, repeated synthetic productions. It's important to
|
|
||||||
# de-duplicate productions, otherwise we'll wind up with ambiguities
|
|
||||||
# in the parser.
|
|
||||||
#
|
|
||||||
# These dictionaries track the synthetic rules: the keys are
|
|
||||||
# production and also the parameter (if any), and the values are the
|
|
||||||
# names of the productions that produce the effect.
|
|
||||||
#
|
|
||||||
groups: dict[tuple[str, ...], str] = {}
|
|
||||||
indents: dict[tuple[tuple[str, ...], int], str] = {}
|
|
||||||
newlines: dict[tuple[tuple[str, ...], str], str] = {}
|
|
||||||
prefix_count: int = 0
|
|
||||||
|
|
||||||
final_newlines: dict[str, str] = {}
|
|
||||||
|
|
||||||
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
|
||||||
if name not in visited:
|
|
||||||
visited.add(name)
|
|
||||||
for production in rule.fn(grammar).flatten(with_metadata=True):
|
|
||||||
trans_prod = compile_production(production)
|
|
||||||
generated_grammar.append((name, trans_prod))
|
|
||||||
|
|
||||||
def compile_production(production: parser.FlattenedWithMetadata) -> list[str]:
|
|
||||||
nonlocal groups
|
|
||||||
nonlocal indents
|
|
||||||
nonlocal newlines
|
|
||||||
nonlocal prefix_count
|
|
||||||
nonlocal final_newlines
|
|
||||||
|
|
||||||
prefix_stack: list[str] = []
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for item in production:
|
|
||||||
if isinstance(item, str):
|
|
||||||
nt = nonterminals[item]
|
|
||||||
if nt.transparent:
|
|
||||||
# If it's transparent then we make a new set of
|
|
||||||
# productions that covers the contents of the
|
|
||||||
# transparent nonterminal.
|
|
||||||
name = "xxx_" + nt.name
|
|
||||||
compile_nonterminal(name, nt)
|
|
||||||
result.append(name)
|
|
||||||
else:
|
|
||||||
# Otherwise it's a "token" in our input, named
|
|
||||||
# "tree_{whatever}".
|
|
||||||
result.append(f"tree_{item}")
|
|
||||||
|
|
||||||
elif isinstance(item, parser.Terminal):
|
|
||||||
# If it's a terminal it will appear in our input as
|
|
||||||
# "token_{whatever}".
|
|
||||||
result.append(f"token_{item.name}")
|
|
||||||
|
|
||||||
else:
|
|
||||||
meta, children = item
|
|
||||||
tx_children = compile_production(children)
|
|
||||||
|
|
||||||
pretty = meta.get("format")
|
|
||||||
if isinstance(pretty, parser.FormatMeta):
|
|
||||||
if pretty.group:
|
|
||||||
# Make a fake rule.
|
|
||||||
child_key = tuple(tx_children)
|
|
||||||
rule_name = groups.get(child_key)
|
|
||||||
if rule_name is None:
|
|
||||||
rule_name = f"g_{len(groups)}"
|
|
||||||
groups[child_key] = rule_name
|
|
||||||
generated_grammar.append((rule_name, tx_children))
|
|
||||||
|
|
||||||
tx_children = [rule_name]
|
|
||||||
|
|
||||||
if pretty.indent:
|
|
||||||
child_key = (tuple(tx_children), pretty.indent)
|
|
||||||
rule_name = indents.get(child_key)
|
|
||||||
if rule_name is None:
|
|
||||||
rule_name = f"i_{len(indents)}"
|
|
||||||
indents[child_key] = rule_name
|
|
||||||
generated_grammar.append((rule_name, tx_children))
|
|
||||||
|
|
||||||
tx_children = [rule_name]
|
|
||||||
|
|
||||||
if pretty.newline is not None:
|
|
||||||
if len(tx_children) == 0:
|
|
||||||
tx_children = result
|
|
||||||
result = []
|
|
||||||
|
|
||||||
if len(tx_children) > 0:
|
|
||||||
# n == postfix newline
|
|
||||||
child_key = (tuple(tx_children), pretty.newline)
|
|
||||||
rule_name = newlines.get(child_key)
|
|
||||||
if rule_name is None:
|
|
||||||
rule_name = f"n_{len(newlines)}"
|
|
||||||
newlines[child_key] = rule_name
|
|
||||||
generated_grammar.append((rule_name, tx_children))
|
|
||||||
|
|
||||||
tx_children = [rule_name]
|
|
||||||
|
|
||||||
else:
|
|
||||||
# p == prefix newline
|
|
||||||
rule_name = f"p_{prefix_count}"
|
|
||||||
prefix_count += 1
|
|
||||||
final_newlines[rule_name] = pretty.newline
|
|
||||||
prefix_stack.append(rule_name)
|
|
||||||
|
|
||||||
if pretty.forced_break:
|
|
||||||
if len(tx_children) == 0:
|
|
||||||
tx_children = result
|
|
||||||
result = []
|
|
||||||
|
|
||||||
if len(tx_children) > 0:
|
|
||||||
# f == postfix forced break
|
|
||||||
rule_name = f"f_{prefix_count}"
|
|
||||||
prefix_count += 1
|
|
||||||
|
|
||||||
generated_grammar.append((rule_name, tx_children))
|
|
||||||
tx_children = [rule_name]
|
|
||||||
else:
|
|
||||||
# d == prefix forced break (to the right of 'f' on my kbd)
|
|
||||||
rule_name = f"d_{prefix_count}"
|
|
||||||
prefix_count += 1
|
|
||||||
prefix_stack.append(rule_name)
|
|
||||||
|
|
||||||
# If it turned out to have formatting meta then we will
|
|
||||||
# have replaced or augmented the translated children
|
|
||||||
# appropriately. Otherwise, if it's highlighting meta or
|
|
||||||
# something else, we'll have ignored it and the
|
|
||||||
# translated children should just be inserted inline.
|
|
||||||
result.extend(tx_children)
|
|
||||||
|
|
||||||
# OK so we might have some prefix newlines. They should contain... things.
|
|
||||||
while len(prefix_stack) > 0:
|
|
||||||
rule_name = prefix_stack.pop()
|
|
||||||
generated_grammar.append((rule_name, result))
|
|
||||||
result = [rule_name]
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
start_name = f"yyy_{rule.name}"
|
|
||||||
compile_nonterminal(start_name, rule)
|
|
||||||
gen = grammar._generator(start_name, generated_grammar)
|
|
||||||
parse_table = gen.gen_table()
|
|
||||||
|
|
||||||
for (_, replacement), rule_name in newlines.items():
|
|
||||||
final_newlines[rule_name] = replacement
|
|
||||||
|
|
||||||
indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
|
|
||||||
|
|
||||||
return MatcherTable(
|
|
||||||
parse_table,
|
|
||||||
indent_amounts,
|
|
||||||
final_newlines,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class PrettyTable:
|
|
||||||
"""Information necessary to convert a parsed tree into a wadler-style
|
|
||||||
pretty document, where it can then be formatted.
|
|
||||||
|
|
||||||
This is basically a bunch of "MatcherTables", one for each kind of tree,
|
|
||||||
that tell us how to recover document structure from the tree node.
|
|
||||||
"""
|
|
||||||
|
|
||||||
indent: str
|
|
||||||
trivia_modes: dict[str, parser.TriviaMode]
|
|
||||||
matchers: dict[str, MatcherTable]
|
|
||||||
|
|
||||||
|
|
||||||
def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> PrettyTable:
|
|
||||||
nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
|
|
||||||
matchers = {}
|
|
||||||
|
|
||||||
if indent is None:
|
|
||||||
indent = getattr(grammar, "pretty_indent", None)
|
|
||||||
if indent is None:
|
|
||||||
indent = " "
|
|
||||||
|
|
||||||
trivia_mode = {}
|
|
||||||
for t in grammar.terminals():
|
|
||||||
mode = t.meta.get("trivia_mode")
|
|
||||||
if t.name is not None and isinstance(mode, parser.TriviaMode):
|
|
||||||
trivia_mode[t.name] = mode
|
|
||||||
|
|
||||||
for name, rule in nonterminals.items():
|
|
||||||
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
|
|
||||||
|
|
||||||
return PrettyTable(
|
|
||||||
indent,
|
|
||||||
trivia_mode,
|
|
||||||
matchers,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
# The Actual Pretty Printer
|
# The Actual Pretty Printer
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
||||||
|
|
||||||
class Matcher:
|
class Matcher:
|
||||||
table: MatcherTable
|
table: builder.MatcherTable
|
||||||
trivia_mode: dict[str, parser.TriviaMode]
|
trivia_mode: dict[str, parser.TriviaMode]
|
||||||
|
|
||||||
def __init__(self, table: MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
|
def __init__(self, table: builder.MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
|
||||||
self.table = table
|
self.table = table
|
||||||
self.trivia_mode = trivia_mode
|
self.trivia_mode = trivia_mode
|
||||||
|
|
||||||
|
|
@ -799,10 +575,10 @@ class Matcher:
|
||||||
|
|
||||||
|
|
||||||
class Printer:
|
class Printer:
|
||||||
table: PrettyTable
|
table: builder.PrettyTable
|
||||||
matchers: dict[str, Matcher]
|
matchers: dict[str, Matcher]
|
||||||
|
|
||||||
def __init__(self, table: PrettyTable):
|
def __init__(self, table: builder.PrettyTable):
|
||||||
self.table = table
|
self.table = table
|
||||||
self.matchers = {
|
self.matchers = {
|
||||||
name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
|
name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
|
||||||
|
|
@ -18,8 +18,9 @@ from parser.parser import (
|
||||||
TriviaMode,
|
TriviaMode,
|
||||||
)
|
)
|
||||||
|
|
||||||
import parser.runtime as runtime
|
import parser.runtime as parser_runtime
|
||||||
import parser.wadler as wadler
|
import parser.wadler.builder as builder
|
||||||
|
import parser.wadler.runtime as runtime
|
||||||
|
|
||||||
|
|
||||||
class JsonGrammar(Grammar):
|
class JsonGrammar(Grammar):
|
||||||
|
|
@ -110,33 +111,33 @@ class JsonGrammar(Grammar):
|
||||||
JSON = JsonGrammar()
|
JSON = JsonGrammar()
|
||||||
JSON_TABLE = JSON.build_table()
|
JSON_TABLE = JSON.build_table()
|
||||||
JSON_LEXER = JSON.compile_lexer()
|
JSON_LEXER = JSON.compile_lexer()
|
||||||
JSON_PARSER = runtime.Parser(JSON_TABLE)
|
JSON_PARSER = parser_runtime.Parser(JSON_TABLE)
|
||||||
|
|
||||||
|
|
||||||
def flatten_document(doc: wadler.Document, src: str) -> list:
|
def flatten_document(doc: runtime.Document, src: str) -> list:
|
||||||
match doc:
|
match doc:
|
||||||
case wadler.NewLine(replace):
|
case runtime.NewLine(replace):
|
||||||
return [f"<newline {repr(replace)}>"]
|
return [f"<newline {repr(replace)}>"]
|
||||||
case wadler.ForceBreak():
|
case runtime.ForceBreak():
|
||||||
return [f"<forced break silent={doc.silent}>"]
|
return [f"<forced break silent={doc.silent}>"]
|
||||||
case wadler.Indent():
|
case runtime.Indent():
|
||||||
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
|
return [[f"<indent {doc.amount}>", flatten_document(doc.doc, src)]]
|
||||||
case wadler.Literal(text):
|
case runtime.Literal(text):
|
||||||
return [text]
|
return [text]
|
||||||
case wadler.Group():
|
case runtime.Group():
|
||||||
return [flatten_document(doc.child, src)]
|
return [flatten_document(doc.child, src)]
|
||||||
case wadler.Lazy():
|
case runtime.Lazy():
|
||||||
return flatten_document(doc.resolve(), src)
|
return flatten_document(doc.resolve(), src)
|
||||||
case wadler.Cons():
|
case runtime.Cons():
|
||||||
result = []
|
result = []
|
||||||
for d in doc.docs:
|
for d in doc.docs:
|
||||||
result += flatten_document(d, src)
|
result += flatten_document(d, src)
|
||||||
return result
|
return result
|
||||||
case None:
|
case None:
|
||||||
return []
|
return []
|
||||||
case wadler.Marker():
|
case runtime.Marker():
|
||||||
return [f"<marker {repr(doc.meta)}>", flatten_document(doc.child, src)]
|
return [f"<marker {repr(doc.meta)}>", flatten_document(doc.child, src)]
|
||||||
case wadler.Trivia():
|
case runtime.Trivia():
|
||||||
return [f"<trivia>", flatten_document(doc.child, src)]
|
return [f"<trivia>", flatten_document(doc.child, src)]
|
||||||
case _:
|
case _:
|
||||||
typing.assert_never(doc)
|
typing.assert_never(doc)
|
||||||
|
|
@ -144,12 +145,12 @@ def flatten_document(doc: wadler.Document, src: str) -> list:
|
||||||
|
|
||||||
def test_convert_tree_to_document():
|
def test_convert_tree_to_document():
|
||||||
text = '{"a": true, "b":[1,2,3]}'
|
text = '{"a": true, "b":[1,2,3]}'
|
||||||
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
tokens = parser_runtime.GenericTokenStream(text, JSON_LEXER)
|
||||||
tree, errors = JSON_PARSER.parse(tokens)
|
tree, errors = JSON_PARSER.parse(tokens)
|
||||||
assert [] == errors
|
assert [] == errors
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
printer = wadler.Printer(wadler.compile_pretty_table(JSON))
|
printer = runtime.Printer(builder.compile_pretty_table(JSON))
|
||||||
doc = flatten_document(printer.convert_tree_to_document(tree, text), text)
|
doc = flatten_document(printer.convert_tree_to_document(tree, text), text)
|
||||||
|
|
||||||
assert doc == [
|
assert doc == [
|
||||||
|
|
@ -211,12 +212,12 @@ def _output(txt: str) -> str:
|
||||||
|
|
||||||
def test_layout_basic():
|
def test_layout_basic():
|
||||||
text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
|
text = '{"a": true, "b":[1,2,3], "c":[1,2,3,4,5,6,7]}'
|
||||||
tokens = runtime.GenericTokenStream(text, JSON_LEXER)
|
tokens = parser_runtime.GenericTokenStream(text, JSON_LEXER)
|
||||||
tree, errors = JSON_PARSER.parse(tokens)
|
tree, errors = JSON_PARSER.parse(tokens)
|
||||||
assert [] == errors
|
assert [] == errors
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
printer = wadler.Printer(wadler.compile_pretty_table(JSON))
|
printer = runtime.Printer(builder.compile_pretty_table(JSON))
|
||||||
result = printer.format_tree(tree, text, 50).apply_to_source(text)
|
result = printer.format_tree(tree, text, 50).apply_to_source(text)
|
||||||
|
|
||||||
assert result == _output(
|
assert result == _output(
|
||||||
|
|
@ -270,15 +271,15 @@ class TG(Grammar):
|
||||||
def test_forced_break():
|
def test_forced_break():
|
||||||
g = TG()
|
g = TG()
|
||||||
g_lexer = g.compile_lexer()
|
g_lexer = g.compile_lexer()
|
||||||
g_parser = runtime.Parser(g.build_table())
|
g_parser = parser_runtime.Parser(g.build_table())
|
||||||
|
|
||||||
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
|
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
|
||||||
|
|
||||||
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
tree, errors = g_parser.parse(parser_runtime.GenericTokenStream(text, g_lexer))
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
printer = wadler.Printer(wadler.compile_pretty_table(g))
|
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||||
|
|
||||||
assert result == _output(
|
assert result == _output(
|
||||||
|
|
@ -300,7 +301,7 @@ def test_forced_break():
|
||||||
def test_maintaining_line_breaks():
|
def test_maintaining_line_breaks():
|
||||||
g = TG()
|
g = TG()
|
||||||
g_lexer = g.compile_lexer()
|
g_lexer = g.compile_lexer()
|
||||||
g_parser = runtime.Parser(g.build_table())
|
g_parser = parser_runtime.Parser(g.build_table())
|
||||||
|
|
||||||
text = """((ok ok)
|
text = """((ok ok)
|
||||||
; Don't break here.
|
; Don't break here.
|
||||||
|
|
@ -314,11 +315,11 @@ def test_maintaining_line_breaks():
|
||||||
; ^ This should only be one break.
|
; ^ This should only be one break.
|
||||||
(ok))"""
|
(ok))"""
|
||||||
|
|
||||||
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
tree, errors = g_parser.parse(parser_runtime.GenericTokenStream(text, g_lexer))
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
printer = wadler.Printer(wadler.compile_pretty_table(g))
|
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||||
|
|
||||||
assert result == _output(
|
assert result == _output(
|
||||||
|
|
@ -341,18 +342,18 @@ def test_maintaining_line_breaks():
|
||||||
def test_trailing_trivia():
|
def test_trailing_trivia():
|
||||||
g = TG()
|
g = TG()
|
||||||
g_lexer = g.compile_lexer()
|
g_lexer = g.compile_lexer()
|
||||||
g_parser = runtime.Parser(g.build_table())
|
g_parser = parser_runtime.Parser(g.build_table())
|
||||||
|
|
||||||
text = """((ok ok)); Don't lose this!
|
text = """((ok ok)); Don't lose this!
|
||||||
|
|
||||||
; Or this!
|
; Or this!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
tree, errors = g_parser.parse(parser_runtime.GenericTokenStream(text, g_lexer))
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
printer = wadler.Printer(wadler.compile_pretty_table(g))
|
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||||
|
|
||||||
assert result == _output(
|
assert result == _output(
|
||||||
|
|
@ -367,18 +368,18 @@ def test_trailing_trivia():
|
||||||
def test_trailing_trivia_two():
|
def test_trailing_trivia_two():
|
||||||
g = TG()
|
g = TG()
|
||||||
g_lexer = g.compile_lexer()
|
g_lexer = g.compile_lexer()
|
||||||
g_parser = runtime.Parser(g.build_table())
|
g_parser = parser_runtime.Parser(g.build_table())
|
||||||
|
|
||||||
text = """((ok ok))
|
text = """((ok ok))
|
||||||
|
|
||||||
; Or this!
|
; Or this!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
tree, errors = g_parser.parse(parser_runtime.GenericTokenStream(text, g_lexer))
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
printer = wadler.Printer(wadler.compile_pretty_table(g))
|
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||||
|
|
||||||
assert result == _output(
|
assert result == _output(
|
||||||
|
|
@ -393,19 +394,21 @@ def test_trailing_trivia_two():
|
||||||
def test_trailing_trivia_split():
|
def test_trailing_trivia_split():
|
||||||
g = TG()
|
g = TG()
|
||||||
g_lexer = g.compile_lexer()
|
g_lexer = g.compile_lexer()
|
||||||
g_parser = runtime.Parser(g.build_table())
|
g_parser = parser_runtime.Parser(g.build_table())
|
||||||
|
|
||||||
text = """((ok ok)); Don't lose this!
|
text = """((ok ok)); Don't lose this!
|
||||||
|
|
||||||
; Or this!
|
; Or this!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tree, errors = g_parser.parse(runtime.GenericTokenStream(text, g_lexer))
|
tree, errors = g_parser.parse(parser_runtime.GenericTokenStream(text, g_lexer))
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree is not None
|
assert tree is not None
|
||||||
|
|
||||||
def rightmost(t: runtime.Tree | runtime.TokenValue) -> runtime.TokenValue | None:
|
def rightmost(
|
||||||
if isinstance(t, runtime.TokenValue):
|
t: parser_runtime.Tree | parser_runtime.TokenValue,
|
||||||
|
) -> parser_runtime.TokenValue | None:
|
||||||
|
if isinstance(t, parser_runtime.TokenValue):
|
||||||
return t
|
return t
|
||||||
|
|
||||||
for child in reversed(t.children):
|
for child in reversed(t.children):
|
||||||
|
|
@ -424,15 +427,15 @@ def test_trailing_trivia_split():
|
||||||
"COMMENT": TriviaMode.LineComment,
|
"COMMENT": TriviaMode.LineComment,
|
||||||
}
|
}
|
||||||
|
|
||||||
pre_trivia, post_trivia = wadler.slice_pre_post_trivia(TRIVIA_MODES, token.post_trivia)
|
pre_trivia, post_trivia = runtime.slice_pre_post_trivia(TRIVIA_MODES, token.post_trivia)
|
||||||
for mode, t in pre_trivia:
|
for mode, t in pre_trivia:
|
||||||
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
|
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
|
||||||
print("-----")
|
print("-----")
|
||||||
for mode, t in post_trivia:
|
for mode, t in post_trivia:
|
||||||
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
|
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
|
||||||
|
|
||||||
trivia_doc = wadler.Matcher(
|
trivia_doc = runtime.Matcher(
|
||||||
wadler.MatcherTable(ParseTable([], [], set()), {}, {}),
|
builder.MatcherTable(ParseTable([], [], set()), {}, {}),
|
||||||
TRIVIA_MODES,
|
TRIVIA_MODES,
|
||||||
).apply_post_trivia(
|
).apply_post_trivia(
|
||||||
token.post_trivia,
|
token.post_trivia,
|
||||||
|
|
@ -447,3 +450,6 @@ def test_trailing_trivia_split():
|
||||||
"; Or this!",
|
"; Or this!",
|
||||||
"<forced break silent=False>",
|
"<forced break silent=False>",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Test prefix breaks!
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue