Saving this for posterity, but it is doomed

Remember that tree levels are generated by context free languages, not
regular languages, and so they can only be recognized by push-down
automatons, not finite state machines.

What happened was that I failed to account for transparent rules.
Without transparent rules the children of a tree node do not have any
recursion in them (by definition!) and so therefore *are* a regular
language. But transparent rules change that: there *can be* recursion
hidden on the same tree level, and it should have been clear from a
moment's reflection that the recursion there meant that tree levels
were once again a context free language.

Fortunately we have a recognizer for context free languages lying
around, so we can just use that I guess.
This commit is contained in:
John Doty 2024-09-09 06:23:25 -07:00
parent 0cbf696303
commit 1d28c82007

View file

@ -1,5 +1,7 @@
# A prettier printer.
import abc
import dataclasses
import math
import typing
from . import parser
@ -12,6 +14,13 @@ class Cons:
right: "Document"
def cons(left: "Document", right: "Document") -> "Document":
if left and right:
return Cons(left, right)
else:
return left or right
@dataclasses.dataclass(frozen=True)
class NewLine:
pass
@ -47,144 +56,230 @@ class Lazy:
Document = None | Text | NewLine | Cons | Indent | Group | Lazy
def resolve_document(doc: Document) -> Document:
match doc:
case Cons(left, right):
lr = resolve_document(left)
rr = resolve_document(right)
if lr is not left or rr is not right:
return cons(lr, rr)
else:
return doc
case Lazy(_):
return doc.resolve()
case _:
return doc
def layout_document(doc: Document) -> typing.Generator[str, None, None]:
del doc
raise NotImplementedError()
@dataclasses.dataclass
class Match:
doc: Document
remaining: list[runtime.Tree | runtime.TokenValue]
class Matcher:
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
raise NotImplementedError()
class NonTerminalMatcher(Matcher):
name: str
printer: "Printer"
def __init__(self, name: str, printer: "Printer"):
self.name = name
self.printer = printer
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
if len(items) == 0:
return None
item = items[0]
if isinstance(item, runtime.Tree) and item.name == self.name:
return Match(
doc=Lazy(value=lambda: self.printer.convert_tree_to_document(item)),
remaining=items[1:],
)
return None
class TerminalMatcher(Matcher):
@dataclasses.dataclass(frozen=True)
class MatchTerminal:
name: str
def __init__(self, name: str):
self.name = name
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
if len(items) == 0:
return None
item = items[0]
if isinstance(item, runtime.TokenValue) and item.kind == self.name:
return Match(
doc=Text(start=item.start, end=item.end),
remaining=items[1:],
)
return None
@dataclasses.dataclass(frozen=True)
class MatchNonTerminal:
name: str
class IndentMatcher(Matcher):
@dataclasses.dataclass(frozen=True)
class Accept:
pass
@dataclasses.dataclass(frozen=True)
class StartGroup:
pass
@dataclasses.dataclass(frozen=True)
class EndGroup:
pass
@dataclasses.dataclass(frozen=True)
class StartIndent:
pass
@dataclasses.dataclass(frozen=True)
class EndIndent:
amount: int
child: Matcher
def __init__(self, amount: int, child: Matcher):
self.amount = amount
self.child = child
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
result = self.child.match(items)
if result is not None:
result.doc = Indent(amount=self.amount, doc=result.doc)
return result
class NewLineMatcher(Matcher):
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
return Match(
doc=NewLine(),
remaining=items,
)
@dataclasses.dataclass(frozen=True)
class Split:
left: int
right: int
class GroupMatcher(Matcher):
child: Matcher
def __init__(self, child: Matcher):
self.child = child
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
result = self.child.match(items)
if result is not None:
result.doc = Group(result.doc)
return result
@dataclasses.dataclass(frozen=True)
class Jump:
next: int
class CompleteMatcher(Matcher):
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
if len(items) == 0:
return Match(doc=None, remaining=[])
else:
return None
MatchInstruction = (
MatchTerminal
| MatchNonTerminal
| Accept
| StartGroup
| EndGroup
| NewLine
| StartIndent
| EndIndent
| Split
| Jump
)
class AlternativeMatcher(Matcher):
children: list[Matcher]
### THIS DOESN'T WORK
###
### YOU CANNOT MATCH RULES WITH TRANSPARENT CHILDREN WITH A FSM, THIS IS NOT
### A REGULAR LANGUAGE IT IS CONTEXT FREE SO WE NEED TO RUN OUR REAL PARSER
### WHICH MEANS YES WE NEED TO GENERATE TABLES AGAIN OUT OF SUB-GRAMMARS FOR
### PRODUCTIONS BUT ALSO GENERATE NEW ONES FOR META AND ALSO RUN ACTIONS
###
### CHRIST.
###
class Matcher:
code: list[MatchInstruction]
def __init__(self, children: list[Matcher] | None = None):
self.children = children or []
def __init__(self):
self.code = []
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
for child in self.children:
m = child.match(items)
if m is not None:
return m
@dataclasses.dataclass
class ThreadState:
pc: int
position: int
count: int
results: list[Document | StartGroup | StartIndent]
def match(self, printer: "Printer", items: list[runtime.Tree | runtime.TokenValue]) -> Document:
threads: list[Matcher.ThreadState] = [
Matcher.ThreadState(pc=0, position=0, results=[], count=0)
]
while len(threads) > 0:
thread = threads.pop()
results = thread.results
while True:
thread.count += 1
if thread.count > 1000:
raise Exception("Too many steps!")
inst = self.code[thread.pc]
print(f"THREAD: {thread.pc}: {inst} ({thread.position})")
match inst:
case MatchTerminal(name):
if thread.position >= len(items):
break
item = items[thread.position]
if not isinstance(item, runtime.TokenValue):
break
if item.kind != name:
break
results.append(Text(item.start, item.end))
thread.pc += 1
thread.position += 1
case MatchNonTerminal(name):
if thread.position >= len(items):
break
item = items[thread.position]
if not isinstance(item, runtime.Tree):
break
if item.name != name:
break
def thunk(capture: runtime.Tree):
return lambda: printer.convert_tree_to_document(capture)
results.append(Lazy(thunk(item)))
thread.pc += 1
thread.position += 1
case Accept():
if thread.position != len(items):
break
result = None
for r in thread.results:
assert not isinstance(r, (StartGroup, StartIndent))
result = cons(result, r)
return result
case StartGroup():
results.append(inst)
thread.pc += 1
case EndGroup():
group_items = None
while not isinstance(results[-1], StartGroup):
item = typing.cast(Document, results.pop())
group_items = cons(item, group_items)
results.pop()
results.append(Group(group_items))
thread.pc += 1
case NewLine():
results.append(NewLine())
thread.pc += 1
case StartIndent():
results.append(inst)
thread.pc += 1
case EndIndent(amount):
indent_items = None
while not isinstance(results[-1], StartIndent):
item = typing.cast(Document, results.pop())
indent_items = cons(item, indent_items)
results.pop()
results.append(Indent(amount, indent_items))
thread.pc += 1
case Split(left, right):
new_thread = Matcher.ThreadState(
pc=right,
position=thread.position,
results=list(thread.results),
count=0,
)
threads.append(new_thread)
thread.pc = left
case Jump(where):
thread.pc = where
threads.append(thread)
case _:
typing.assert_never(inst)
return None
def format(self) -> str:
return "\n".join(self.format_lines())
class SequenceMatcher(Matcher):
children: list[Matcher]
def format_lines(self) -> list[str]:
lines = []
code_len = int(math.log10(len(self.code))) + 1
for i, inst in enumerate(self.code):
lines.append(f"{i: >{code_len}} {inst}")
return lines
def __init__(self, children: list[Matcher] | None = None):
self.children = children or []
def match(self, items: list[runtime.Tree | runtime.TokenValue]) -> Match | None:
doc = None
for child in self.children:
m = child.match(items)
if m is None:
return None
items = m.remaining
doc = Cons(doc, m.doc)
return Match(
doc=doc,
remaining=items,
)
@abc.abstractmethod
def format_into(self, lines: list[str], visited: dict["Matcher", int], indent: int = 0): ...
class PrettyMeta(parser.SyntaxMeta):
@ -195,68 +290,92 @@ class PrettyMeta(parser.SyntaxMeta):
class Printer:
grammar: parser.Grammar
matchers: dict[str, Matcher]
_matchers: dict[str, Matcher]
_nonterminals: dict[str, parser.NonTerminal]
def __init__(self, grammar: parser.Grammar):
self.grammar = grammar
self._nonterminals = {nt.name: nt for nt in grammar.non_terminals()}
self._matchers = {}
def lookup_nonterminal(self, name: str) -> parser.NonTerminal:
raise NotImplementedError()
return self._nonterminals[name]
def production_to_matcher(self, production: parser.FlattenedWithMetadata) -> Matcher:
results = []
for item in production:
if isinstance(item, str):
rule = self.lookup_nonterminal(item)
if rule.transparent:
# If it's transparent then we don't actually match a
# nonterminal here, we need to match against the contents
# of the rule, so we recurse.
results.append(self.rule_to_matcher(rule))
else:
results.append(NonTerminalMatcher(item, self))
elif isinstance(item, parser.Terminal):
name = item.name
assert name is not None
results.append(TerminalMatcher(name))
def compile_rule(self, rule: parser.NonTerminal) -> Matcher:
matcher = Matcher()
code = matcher.code
patcher: dict[str, int] = {}
def compile_nonterminal(rule: parser.NonTerminal):
sub_start = patcher.get(rule.name)
if sub_start is not None:
code.append(Jump(sub_start))
else:
meta, children = item
sub_start = len(code)
patcher[rule.name] = sub_start
tails = []
subs = list(rule.fn(self.grammar).flatten(with_metadata=True))
for sub in subs[:-1]:
split_pos = len(code)
code.append(Split(0, 0))
child = self.production_to_matcher(children)
compile_production(sub)
prettier = meta.get("prettier")
if isinstance(prettier, PrettyMeta):
if prettier.indent:
child = IndentMatcher(prettier.indent, child)
tails.append(len(code))
code.append(Jump(0))
if prettier.group:
child = GroupMatcher(child)
code[split_pos] = Split(sub_start + 1, len(code))
sub_start = len(code)
results.append(child)
compile_production(subs[-1])
if prettier.newline:
results.append(NewLineMatcher())
for tail in tails:
code[tail] = Jump(len(code))
def compile_production(production: parser.FlattenedWithMetadata):
for item in production:
if isinstance(item, str):
rule = self.lookup_nonterminal(item)
if rule.transparent:
# If it's transparent then we need to inline the pattern here.
compile_nonterminal(rule)
else:
code.append(MatchNonTerminal(item))
elif isinstance(item, parser.Terminal):
name = item.name
assert name is not None
code.append(MatchTerminal(name))
else:
results.append(child)
meta, children = item
return SequenceMatcher(results)
prettier = meta.get("prettier")
if isinstance(prettier, PrettyMeta):
if prettier.indent:
code.append(StartIndent())
if prettier.group:
code.append(StartGroup())
compile_production(children)
if isinstance(prettier, PrettyMeta):
if prettier.group:
code.append(EndGroup())
if prettier.indent:
code.append(EndIndent(prettier.indent))
if prettier.newline:
code.append(NewLine())
compile_nonterminal(rule)
code.append(Accept())
return matcher
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher:
result = self.matchers.get(rule.name)
result = self._matchers.get(rule.name)
if result is None:
# Create the empty alternative, be sure to set up the
alts = AlternativeMatcher()
if rule.transparent:
result = alts
else:
result = SequenceMatcher(children=[alts, CompleteMatcher()])
self.matchers[rule.name] = result
for production in rule.fn(self.grammar).flatten(with_metadata=True):
alts.children.append(self.production_to_matcher(production))
result = self.compile_rule(rule)
self._matchers[rule.name] = result
return result
@ -266,11 +385,16 @@ class Printer:
rule = self.lookup_nonterminal(name)
matcher = self.rule_to_matcher(rule)
m = matcher.match(list(tree.children))
assert m is not None, "Could not match a valid tree" # TODO: Exception rather I think
return m.doc
print(f"--------")
print(f"Matching with:\n{matcher.format()}")
m = matcher.match(self, list(tree.children))
print(f"--------")
if m is None:
raise ValueError(
f"Could not match a valid tree for {tree.name} with {len(tree.children)} children:\n{tree.format()}\nMatcher:\n{matcher.format()}"
)
# return m
return resolve_document(m)
def format_tree(self, tree: runtime.Tree) -> str:
doc = self.convert_tree_to_document(tree)