[wadler] Refactor: data and runtime split

Now we convert the grammar into data for a pretty-printer, so in
theoryw e could write the pretty-printer in a different language.
This commit is contained in:
John Doty 2024-09-21 06:44:53 -07:00
parent e4585170d8
commit 1f84752538
2 changed files with 252 additions and 215 deletions

View file

@ -360,252 +360,34 @@ def slice_pre_post_trivia(
return ([], tokens)
############################################################################
# Data to Drive the Pretty Printer
############################################################################
@dataclasses.dataclass
class Matcher:
class MatcherTable:
"""Information necessary to create a document from a concrete parse tree,
as generated by the parser.
(In order to do this we need to re-parse the children of the tree, in
order to recover structure added by transparent rules. That's why each
MatcherTable has an associated ParseTable!)
"""
# Parse table to recover the node into a document
table: parser.ParseTable
# Mapping from the name of i_ rules to indent counts
indent_amounts: dict[str, int]
# Mapping from the names of n_ rules to the text they flatten to
newline_replace: dict[str, str]
trivia_mode: dict[str, parser.TriviaMode]
def match(
self,
printer: "Printer",
items: list[runtime.Tree | runtime.TokenValue],
src: str,
) -> Document:
stack: list[tuple[int, Document]] = [(0, None)]
table = self.table
# eof_trivia = []
# if len(items) > 0:
# item = items[-1]
# if isinstance(item, runtime.TokenValue):
# eof_trivia = item.post_trivia
input = [(child_to_name(i), i) for i in items] + [
(
"$",
runtime.TokenValue(
kind="$",
start=0,
end=0,
pre_trivia=[],
post_trivia=[],
),
)
]
input_index = 0
while True:
current_token = input[input_index]
current_state = stack[-1][0]
action = table.actions[current_state].get(current_token[0], parser.Error())
match action:
case parser.Accept():
result = stack[-1][1]
# result = cons(result, self.apply_trivia(eof_trivia))
return result
case parser.Reduce(name=name, count=size):
child: Document = None
if size > 0:
for _, c in stack[-size:]:
if c is None:
continue
child = cons(child, c)
del stack[-size:]
if name[0] == "g":
child = group(child)
elif name[0] == "i":
amount = self.indent_amounts[name]
child = Indent(amount, child)
elif name[0] == "n":
replace = self.newline_replace[name]
child = cons(child, NewLine(replace))
elif name[0] == "p":
replace = self.newline_replace[name]
child = cons(NewLine(replace), child)
elif name[0] == "f":
child = cons(child, ForceBreak(False))
elif name[0] == "d":
child = cons(ForceBreak(False), child)
else:
pass # Reducing a transparent rule probably.
goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None
stack.append((goto, child))
case parser.Shift():
value = current_token[1]
if isinstance(value, runtime.Tree):
child = Lazy.from_tree(value, src, printer)
else:
child = cons(
trivia(self.apply_pre_trivia(value.pre_trivia, src)),
Literal(src[value.start : value.end]),
trivia(self.apply_post_trivia(value.post_trivia, src)),
)
stack.append((action.state, child))
input_index += 1
case parser.Error():
raise Exception("How did I get a parse error here??")
def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
pre_trivia, _ = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
# print(f"PRE:\n{pre_trivia}")
if len(pre_trivia) == 0:
return None
at_start_of_file = pre_trivia[0][1].start == 0
trivia_doc = None
new_line_count = 0
for mode, token in pre_trivia:
# print(f"PRE {mode:25} {token.kind:30} ({new_line_count})")
match mode:
case parser.TriviaMode.LineComment:
trivia_doc = cons(
trivia_doc,
Literal(src[token.start : token.end]),
ForceBreak(False),
)
new_line_count = 0 # There will be a newline after this.
at_start_of_file = False
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
new_line_count += 1
if new_line_count == 2 and not at_start_of_file:
trivia_doc = cons(
trivia_doc,
ForceBreak(False),
)
case _:
typing.assert_never(mode)
return trivia_doc
def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
return self.apply_eof_trivia(trivia_tokens, src)
_, post_trivia = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
trivia_doc = None
for mode, token in post_trivia:
# print(f"POST {mode:25} {token.kind:30}")
match mode:
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
# Anything after a line break is not processed as post
# trivia.
break
case parser.TriviaMode.LineComment:
# Because this is post-trivia, we know there's something
# to our left, and we can force the space.
trivia_doc = cons(
Literal(" "),
Literal(src[token.start : token.end]),
ForceBreak(True), # And the line needs to end.
)
break
case _:
typing.assert_never(mode)
return trivia_doc
def apply_eof_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
# EOF trivia has weird rules, namely, it's like pre and post joined together but.
tokens = [
(self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
for token in trivia_tokens
]
at_start = True
newline_count = 0
trivia_doc = None
for mode, token in tokens:
match mode:
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
at_start = False
newline_count += 1
if newline_count <= 2:
trivia_doc = cons(trivia_doc, ForceBreak(False))
case parser.TriviaMode.LineComment:
# Because this is post-trivia, we know there's something
# to our left, and we can force the space.
trivia_doc = cons(
trivia_doc,
Literal(" ") if at_start else None,
Literal(src[token.start : token.end]),
)
newline_count = 0
at_start = False
case _:
typing.assert_never(mode)
return trivia_doc
class Printer:
# TODO: Pre-generate the matcher tables for a grammar, to make it
# possible to do codegen in other languages.
grammar: parser.Grammar
_matchers: dict[str, Matcher]
_nonterminals: dict[str, parser.NonTerminal]
_indent: str
_trivia_mode: dict[str, parser.TriviaMode]
def __init__(self, grammar: parser.Grammar, indent: str | None = None):
self.grammar = grammar
self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
self._matchers = {}
if indent is None:
indent = getattr(self.grammar, "pretty_indent", None)
if indent is None:
indent = " "
self._indent = indent
trivia_mode = {}
for t in grammar.terminals():
mode = t.meta.get("trivia_mode")
if t.name is not None and isinstance(mode, parser.TriviaMode):
trivia_mode[t.name] = mode
self._trivia_mode = trivia_mode
def indent(self) -> str:
return self._indent
def lookup_nonterminal(self, name: str) -> parser.NonTerminal:
return self._nonterminals[name]
def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
def _compile_nonterminal_matcher(
grammar: parser.Grammar,
nonterminals: dict[str, parser.NonTerminal],
rule: parser.NonTerminal,
) -> MatcherTable:
generated_grammar: list[typing.Tuple[str, list[str]]] = []
visited: set[str] = set()
@ -629,7 +411,7 @@ class Printer:
def compile_nonterminal(name: str, rule: parser.NonTerminal):
if name not in visited:
visited.add(name)
for production in rule.fn(self.grammar).flatten(with_metadata=True):
for production in rule.fn(grammar).flatten(with_metadata=True):
trans_prod = compile_production(production)
generated_grammar.append((name, trans_prod))
@ -645,7 +427,7 @@ class Printer:
result = []
for item in production:
if isinstance(item, str):
nt = self._nonterminals[item]
nt = nonterminals[item]
if nt.transparent:
# If it's transparent then we make a new set of
# productions that covers the contents of the
@ -748,7 +530,7 @@ class Printer:
start_name = f"yyy_{rule.name}"
compile_nonterminal(start_name, rule)
gen = self.grammar._generator(start_name, generated_grammar)
gen = grammar._generator(start_name, generated_grammar)
parse_table = gen.gen_table()
for (_, replacement), rule_name in newlines.items():
@ -756,27 +538,284 @@ class Printer:
indent_amounts = {rule_name: amount for ((_, amount), rule_name) in indents.items()}
return Matcher(
return MatcherTable(
parse_table,
indent_amounts,
final_newlines,
self._trivia_mode,
)
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
result = self._matchers.get(rule.name)
if result is None:
result = self.compile_rule(rule)
self._matchers[rule.name] = result
@dataclasses.dataclass
class PrettyTable:
"""Information necessary to convert a parsed tree into a wadler-style
pretty document, where it can then be formatted.
This is basically a bunch of "MatcherTables", one for each kind of tree,
that tell us how to recover document structure from the tree node.
"""
indent: str
trivia_modes: dict[str, parser.TriviaMode]
matchers: dict[str, MatcherTable]
def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> PrettyTable:
nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
matchers = {}
if indent is None:
indent = getattr(grammar, "pretty_indent", None)
if indent is None:
indent = " "
trivia_mode = {}
for t in grammar.terminals():
mode = t.meta.get("trivia_mode")
if t.name is not None and isinstance(mode, parser.TriviaMode):
trivia_mode[t.name] = mode
for name, rule in nonterminals.items():
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
return PrettyTable(
indent,
trivia_mode,
matchers,
)
############################################################################
# The Actual Pretty Printer
############################################################################
class Matcher:
table: MatcherTable
trivia_mode: dict[str, parser.TriviaMode]
def __init__(self, table: MatcherTable, trivia_mode: dict[str, parser.TriviaMode]):
self.table = table
self.trivia_mode = trivia_mode
def match(
self,
printer: "Printer",
items: list[runtime.Tree | runtime.TokenValue],
src: str,
) -> Document:
stack: list[tuple[int, Document]] = [(0, None)]
table = self.table.table
# eof_trivia = []
# if len(items) > 0:
# item = items[-1]
# if isinstance(item, runtime.TokenValue):
# eof_trivia = item.post_trivia
input = [(child_to_name(i), i) for i in items] + [
(
"$",
runtime.TokenValue(
kind="$",
start=0,
end=0,
pre_trivia=[],
post_trivia=[],
),
)
]
input_index = 0
while True:
current_token = input[input_index]
current_state = stack[-1][0]
action = table.actions[current_state].get(current_token[0], parser.Error())
match action:
case parser.Accept():
result = stack[-1][1]
# result = cons(result, self.apply_trivia(eof_trivia))
return result
case parser.Reduce(name=name, count=size):
child: Document = None
if size > 0:
for _, c in stack[-size:]:
if c is None:
continue
child = cons(child, c)
del stack[-size:]
if name[0] == "g":
child = group(child)
elif name[0] == "i":
amount = self.table.indent_amounts[name]
child = Indent(amount, child)
elif name[0] == "n":
replace = self.table.newline_replace[name]
child = cons(child, NewLine(replace))
elif name[0] == "p":
replace = self.table.newline_replace[name]
child = cons(NewLine(replace), child)
elif name[0] == "f":
child = cons(child, ForceBreak(False))
elif name[0] == "d":
child = cons(ForceBreak(False), child)
else:
pass # Reducing a transparent rule probably.
goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None
stack.append((goto, child))
case parser.Shift():
value = current_token[1]
if isinstance(value, runtime.Tree):
child = Lazy.from_tree(value, src, printer)
else:
child = cons(
trivia(self.apply_pre_trivia(value.pre_trivia, src)),
Literal(src[value.start : value.end]),
trivia(self.apply_post_trivia(value.post_trivia, src)),
)
stack.append((action.state, child))
input_index += 1
case parser.Error():
raise Exception("How did I get a parse error here??")
def apply_pre_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
pre_trivia, _ = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
if len(pre_trivia) == 0:
return None
at_start_of_file = pre_trivia[0][1].start == 0
trivia_doc = None
new_line_count = 0
for mode, token in pre_trivia:
match mode:
case parser.TriviaMode.LineComment:
trivia_doc = cons(
trivia_doc,
Literal(src[token.start : token.end]),
ForceBreak(False),
)
new_line_count = 0 # There will be a newline after this.
at_start_of_file = False
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
new_line_count += 1
if new_line_count == 2 and not at_start_of_file:
trivia_doc = cons(
trivia_doc,
ForceBreak(False),
)
case _:
typing.assert_never(mode)
return trivia_doc
def apply_post_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
if len(trivia_tokens) > 0 and trivia_tokens[-1].end == len(src):
return self.apply_eof_trivia(trivia_tokens, src)
_, post_trivia = slice_pre_post_trivia(self.trivia_mode, trivia_tokens)
trivia_doc = None
for mode, token in post_trivia:
match mode:
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
# Anything after a line break is not processed as post
# trivia.
break
case parser.TriviaMode.LineComment:
# Because this is post-trivia, we know there's something
# to our left, and we can force the space.
trivia_doc = cons(
Literal(" "),
Literal(src[token.start : token.end]),
ForceBreak(True), # And the line needs to end.
)
break
case _:
typing.assert_never(mode)
return trivia_doc
def apply_eof_trivia(self, trivia_tokens: list[runtime.TokenValue], src: str) -> Document:
# EOF trivia has weird rules, namely, it's like pre and post joined together but.
tokens = [
(self.trivia_mode.get(token.kind, parser.TriviaMode.Blank), token)
for token in trivia_tokens
]
at_start = True
newline_count = 0
trivia_doc = None
for mode, token in tokens:
match mode:
case parser.TriviaMode.Blank:
pass
case parser.TriviaMode.NewLine:
at_start = False
newline_count += 1
if newline_count <= 2:
trivia_doc = cons(trivia_doc, ForceBreak(False))
case parser.TriviaMode.LineComment:
# Because this is post-trivia, we know there's something
# to our left, and we can force the space.
trivia_doc = cons(
trivia_doc,
Literal(" ") if at_start else None,
Literal(src[token.start : token.end]),
)
newline_count = 0
at_start = False
case _:
typing.assert_never(mode)
return trivia_doc
class Printer:
table: PrettyTable
matchers: dict[str, Matcher]
def __init__(self, table: PrettyTable):
self.table = table
self.matchers = {
name: Matcher(value, self.table.trivia_modes) for name, value in table.matchers.items()
}
def indent(self) -> str:
return self.table.indent
def convert_tree_to_document(self, tree: runtime.Tree, src: str) -> Document:
name = tree.name
assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
rule = self.lookup_nonterminal(name)
matcher = self.rule_to_matcher(rule)
matcher = self.matchers[name]
m = matcher.match(self, list(tree.children), src)
if m is None:
raise ValueError(
@ -786,4 +825,4 @@ class Printer:
def format_tree(self, tree: runtime.Tree, src: str, width: int) -> DocumentLayout:
doc = self.convert_tree_to_document(tree, src)
return layout_document(doc, width, self._indent)
return layout_document(doc, width, self.table.indent)

View file

@ -149,7 +149,7 @@ def test_convert_tree_to_document():
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
printer = wadler.Printer(wadler.compile_pretty_table(JSON))
doc = flatten_document(printer.convert_tree_to_document(tree, text), text)
assert doc == [
@ -216,7 +216,7 @@ def test_layout_basic():
assert [] == errors
assert tree is not None
printer = wadler.Printer(JSON)
printer = wadler.Printer(wadler.compile_pretty_table(JSON))
result = printer.format_tree(tree, text, 50).apply_to_source(text)
assert result == _output(
@ -278,7 +278,7 @@ def test_forced_break():
assert errors == []
assert tree is not None
printer = wadler.Printer(g)
printer = wadler.Printer(wadler.compile_pretty_table(g))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -318,7 +318,7 @@ def test_maintaining_line_breaks():
assert errors == []
assert tree is not None
printer = wadler.Printer(g)
printer = wadler.Printer(wadler.compile_pretty_table(g))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -352,7 +352,7 @@ def test_trailing_trivia():
assert errors == []
assert tree is not None
printer = wadler.Printer(g)
printer = wadler.Printer(wadler.compile_pretty_table(g))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -378,7 +378,7 @@ def test_trailing_trivia_two():
assert errors == []
assert tree is not None
printer = wadler.Printer(g)
printer = wadler.Printer(wadler.compile_pretty_table(g))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -432,9 +432,7 @@ def test_trailing_trivia_split():
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
trivia_doc = wadler.Matcher(
ParseTable([], [], set()),
{},
{},
wadler.MatcherTable(ParseTable([], [], set()), {}, {}),
TRIVIA_MODES,
).apply_post_trivia(
token.post_trivia,