lrparsers/parser/wadler.py
John Doty 1d28c82007 Saving this for posterity, but it is doomed
Remember that tree levels are generated by context free languages, not
regular languages, and so they can only be recognized by push-down
automatons, not finite state machines.

What happened was that I failed to account for transparent rules.
Without transparent rules the children of a tree node do not have any
recursion in them (by definition!) and so therefore *are* a regular
language. But transparent rules change that: there *can be* recursion
hidden on the same tree level, and it should have been clear from a
moment's reflection that the recursion there meant that tree levels
were once again a context free language.

Fortunately we have a recognizer for context free languages lying
around, so we can just use that I guess.
2024-09-09 06:23:25 -07:00

401 lines
11 KiB
Python

# A prettier printer.
import abc
import dataclasses
import math
import typing
from . import parser
from . import runtime
@dataclasses.dataclass(frozen=True)
class Cons:
left: "Document"
right: "Document"
def cons(left: "Document", right: "Document") -> "Document":
if left and right:
return Cons(left, right)
else:
return left or right
@dataclasses.dataclass(frozen=True)
class NewLine:
pass
@dataclasses.dataclass(frozen=True)
class Indent:
amount: int
doc: "Document"
@dataclasses.dataclass(frozen=True)
class Text:
start: int
end: int
@dataclasses.dataclass(frozen=True)
class Group:
child: "Document"
@dataclasses.dataclass
class Lazy:
value: typing.Callable[[], "Document"] | "Document"
def resolve(self) -> "Document":
if callable(self.value):
self.value = self.value()
return self.value
Document = None | Text | NewLine | Cons | Indent | Group | Lazy
def resolve_document(doc: Document) -> Document:
match doc:
case Cons(left, right):
lr = resolve_document(left)
rr = resolve_document(right)
if lr is not left or rr is not right:
return cons(lr, rr)
else:
return doc
case Lazy(_):
return doc.resolve()
case _:
return doc
def layout_document(doc: Document) -> typing.Generator[str, None, None]:
del doc
raise NotImplementedError()
@dataclasses.dataclass(frozen=True)
class MatchTerminal:
name: str
@dataclasses.dataclass(frozen=True)
class MatchNonTerminal:
name: str
@dataclasses.dataclass(frozen=True)
class Accept:
pass
@dataclasses.dataclass(frozen=True)
class StartGroup:
pass
@dataclasses.dataclass(frozen=True)
class EndGroup:
pass
@dataclasses.dataclass(frozen=True)
class StartIndent:
pass
@dataclasses.dataclass(frozen=True)
class EndIndent:
amount: int
@dataclasses.dataclass(frozen=True)
class Split:
left: int
right: int
@dataclasses.dataclass(frozen=True)
class Jump:
next: int
MatchInstruction = (
MatchTerminal
| MatchNonTerminal
| Accept
| StartGroup
| EndGroup
| NewLine
| StartIndent
| EndIndent
| Split
| Jump
)
### THIS DOESN'T WORK
###
### YOU CANNOT MATCH RULES WITH TRANSPARENT CHILDREN WITH A FSM, THIS IS NOT
### A REGULAR LANGUAGE IT IS CONTEXT FREE SO WE NEED TO RUN OUR REAL PARSER
### WHICH MEANS YES WE NEED TO GENERATE TABLES AGAIN OUT OF SUB-GRAMMARS FOR
### PRODUCTIONS BUT ALSO GENERATE NEW ONES FOR META AND ALSO RUN ACTIONS
###
### CHRIST.
###
class Matcher:
code: list[MatchInstruction]
def __init__(self):
self.code = []
@dataclasses.dataclass
class ThreadState:
pc: int
position: int
count: int
results: list[Document | StartGroup | StartIndent]
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
threads: list[Matcher.ThreadState] = [
Matcher.ThreadState(pc=0, position=0, results=[], count=0)
]
while len(threads) > 0:
thread = threads.pop()
results = thread.results
while True:
thread.count += 1
if thread.count > 1000:
raise Exception("Too many steps!")
inst = self.code[thread.pc]
print(f"THREAD: {thread.pc}: {inst} ({thread.position})")
match inst:
case MatchTerminal(name):
if thread.position >= len(items):
break
item = items[thread.position]
if not isinstance(item, runtime.TokenValue):
break
if item.kind != name:
break
results.append(Text(item.start, item.end))
thread.pc += 1
thread.position += 1
case MatchNonTerminal(name):
if thread.position >= len(items):
break
item = items[thread.position]
if not isinstance(item, runtime.Tree):
break
if item.name != name:
break
def thunk(capture: runtime.Tree):
return lambda: printer.convert_tree_to_document(capture)
results.append(Lazy(thunk(item)))
thread.pc += 1
thread.position += 1
case Accept():
if thread.position != len(items):
break
result = None
for r in thread.results:
assert not isinstance(r, (StartGroup, StartIndent))
result = cons(result, r)
return result
case StartGroup():
results.append(inst)
thread.pc += 1
case EndGroup():
group_items = None
while not isinstance(results[-1], StartGroup):
item = typing.cast(Document, results.pop())
group_items = cons(item, group_items)
results.pop()
results.append(Group(group_items))
thread.pc += 1
case NewLine():
results.append(NewLine())
thread.pc += 1
case StartIndent():
results.append(inst)
thread.pc += 1
case EndIndent(amount):
indent_items = None
while not isinstance(results[-1], StartIndent):
item = typing.cast(Document, results.pop())
indent_items = cons(item, indent_items)
results.pop()
results.append(Indent(amount, indent_items))
thread.pc += 1
case Split(left, right):
new_thread = Matcher.ThreadState(
pc=right,
position=thread.position,
results=list(thread.results),
count=0,
)
threads.append(new_thread)
thread.pc = left
case Jump(where):
thread.pc = where
threads.append(thread)
case _:
typing.assert_never(inst)
return None
def format(self) -> str:
return "\n".join(self.format_lines())
def format_lines(self) -> list[str]:
lines = []
code_len = int(math.log10(len(self.code))) + 1
for i, inst in enumerate(self.code):
lines.append(f"{i: >{code_len}} {inst}")
return lines
@abc.abstractmethod
def format_into(self, lines: list[str], visited: dict["Matcher", int], indent: int = 0): ...
class PrettyMeta(parser.SyntaxMeta):
newline: bool
indent: int | None
group: bool
class Printer:
grammar: parser.Grammar
_matchers: dict[str, Matcher]
_nonterminals: dict[str, parser.NonTerminal]
def __init__(self, grammar: parser.Grammar):
self.grammar = grammar
self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
self._matchers = {}
def lookup_nonterminal(self, name: str) -> parser.NonTerminal:
return self._nonterminals[name]
def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
matcher = Matcher()
code = matcher.code
patcher: dict[str, int] = {}
def compile_nonterminal(rule: parser.NonTerminal):
sub_start = patcher.get(rule.name)
if sub_start is not None:
code.append(Jump(sub_start))
else:
sub_start = len(code)
patcher[rule.name] = sub_start
tails = []
subs = list(rule.fn(self.grammar).flatten(with_metadata=True))
for sub in subs[:-1]:
split_pos = len(code)
code.append(Split(0, 0))
compile_production(sub)
tails.append(len(code))
code.append(Jump(0))
code[split_pos] = Split(sub_start + 1, len(code))
sub_start = len(code)
compile_production(subs[-1])
for tail in tails:
code[tail] = Jump(len(code))
def compile_production(production: parser.FlattenedWithMetadata):
for item in production:
if isinstance(item, str):
rule = self.lookup_nonterminal(item)
if rule.transparent:
# If it's transparent then we need to inline the pattern here.
compile_nonterminal(rule)
else:
code.append(MatchNonTerminal(item))
elif isinstance(item, parser.Terminal):
name = item.name
assert name is not None
code.append(MatchTerminal(name))
else:
meta, children = item
prettier = meta.get("prettier")
if isinstance(prettier, PrettyMeta):
if prettier.indent:
code.append(StartIndent())
if prettier.group:
code.append(StartGroup())
compile_production(children)
if isinstance(prettier, PrettyMeta):
if prettier.group:
code.append(EndGroup())
if prettier.indent:
code.append(EndIndent(prettier.indent))
if prettier.newline:
code.append(NewLine())
compile_nonterminal(rule)
code.append(Accept())
return matcher
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
result = self._matchers.get(rule.name)
if result is None:
result = self.compile_rule(rule)
self._matchers[rule.name] = result
return result
def convert_tree_to_document(self, tree: runtime.Tree) -> Document:
name = tree.name
assert name is not None, "Cannot format a tree if it still has transparent nodes inside"
rule = self.lookup_nonterminal(name)
matcher = self.rule_to_matcher(rule)
print(f"--------")
print(f"Matching with:\n{matcher.format()}")
m = matcher.match(self, list(tree.children))
print(f"--------")
if m is None:
raise ValueError(
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}\nMatcher:\n{matcher.format()}"
)
# return m
return resolve_document(m)
def format_tree(self, tree: runtime.Tree) -> str:
doc = self.convert_tree_to_document(tree)
return next(layout_document(doc))