Move formatting meta around, actually mark stuff up

This commit is contained in:
John Doty 2024-09-10 11:47:22 -07:00
parent 7edf5e06bf
commit 443bf8bd33
3 changed files with 61 additions and 57 deletions

View file

@ -1740,6 +1740,12 @@ class NothingRule(Rule):
Nothing = NothingRule()
class SyntaxMeta:
"""A maybe base class for annotations to a rule."""
pass
class MetadataRule(Rule):
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
self.rule = rule
@ -1785,18 +1791,6 @@ def mark(rule: Rule, **kwargs) -> Rule:
return MetadataRule(rule, kwargs)
def group(*rules: Rule) -> Rule:
return seq(*rules)
def indent(*rules: Rule) -> Rule:
return seq(*rules)
def newline() -> Rule:
return Nothing
@typing.overload
def rule(f: typing.Callable, /) -> Rule: ...
@ -2366,22 +2360,9 @@ def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"):
f.write("}\n")
# NOTE: We have rich metadata system man, wow, how cool are we?
#
# The whole point of this stuff here is to allow automatic
# generation/maintenance of syntax coloring for editors. And maybe some
# other stuff? This is *extremely provisional*, I'm not even sure it
# makes sense yet. Tree sitter works differently, for example, and it's
# not clear at all what we want to generate for any particular editor.
#
# This here might be enough to produce extremely basic TextMate
# grammars but anything more complicated will want tree patterns
# anyway, and we can only do tree patterns by influencing the grammar.
#
# Here's the info on textmate grammars:
# https://macromates.com/manual/en/language_grammars
class SyntaxMeta:
pass
###############################################################################
# Highlighting metadata support
###############################################################################
class HighlightMeta(SyntaxMeta):
@ -2690,6 +2671,33 @@ class _Highlight:
highlight = _Highlight()
###############################################################################
# Pretty-printing metadata support
###############################################################################
@dataclasses.dataclass
class FormatMeta(SyntaxMeta):
newline: bool = False
indent: int | None = None
group: bool = False
def group(*rules: Rule) -> Rule:
return mark(seq(*rules), format=FormatMeta(group=True))
def indent(*rules: Rule, amount: int | None = None) -> Rule:
if amount is None:
amount = 4
return mark(seq(*rules), format=FormatMeta(indent=amount))
def newline() -> Rule:
return mark(Nothing, format=FormatMeta(newline=True))
###############################################################################
# Finally, the base class for grammars
###############################################################################

View file

@ -134,7 +134,7 @@ def apply_precedence(js: str, name: str, grammar: parser.Grammar) -> str:
return js
def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str | None:
method = getattr(rule, "convert_to_tree_sitter", None)
if method is not None:
return method(grammar)
@ -146,7 +146,7 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
return f"$['{target_name}']"
elif isinstance(rule, parser.AlternativeRule):
final = []
final: list[str] = []
queue = []
has_nothing = False
queue.append(rule)
@ -155,15 +155,17 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
if isinstance(part, parser.AlternativeRule):
queue.append(part.right)
queue.append(part.left)
elif isinstance(part, parser.NothingRule):
has_nothing = True
else:
final.append(part)
converted = convert_to_tree_sitter(part, grammar)
if converted is None:
has_nothing = True
else:
final.append(converted)
if len(final) == 0:
raise Exception("Unsupported rule: empty alternative")
result = ", ".join([convert_to_tree_sitter(r, grammar) for r in final])
result = ", ".join(final)
if len(final) > 1:
result = f"choice({result})"
if has_nothing:
@ -172,6 +174,7 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
elif isinstance(rule, parser.SequenceRule):
final = []
pieces = []
queue = []
queue.append(rule)
while len(queue) > 0:
@ -179,10 +182,11 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
if isinstance(part, parser.SequenceRule):
queue.append(part.second)
queue.append(part.first)
elif isinstance(part, parser.NothingRule):
pass
else:
final.append(part)
piece = convert_to_tree_sitter(part, grammar)
if piece is not None:
pieces.append(piece)
final.append(part)
if len(final) == 0:
raise Exception("Unsupported rule: empty sequence")
@ -196,8 +200,6 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
#
# https://github.com/tree-sitter/tree-sitter/issues/372
#
pieces = [convert_to_tree_sitter(r, grammar) for r in final]
def make_seq(pieces: list[str]):
if len(pieces) == 1:
return pieces[0]
@ -223,11 +225,17 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
elif isinstance(rule, parser.MetadataRule):
result = convert_to_tree_sitter(rule.rule, grammar)
if result is None:
return None
field = rule.metadata.get("field")
if field is not None:
result = f"field('{field}', {result})"
return result
elif isinstance(rule, parser.NothingRule):
return None
else:
raise ValueError(f"Rule {rule} not supported for tree-sitter")
@ -257,6 +265,8 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
body = rule.fn(grammar)
rule_definition = convert_to_tree_sitter(body, grammar)
if rule_definition is None:
raise Exception(f"Tree-sitter does not support the empty rule {rule_name}")
rule_definition = apply_precedence(rule_definition, rule.name, grammar)
f.write(f" '{rule_name}': $ => {rule_definition},")

View file

@ -109,14 +109,6 @@ class Matcher:
current_state = stack[-1][0]
action = table.actions[current_state].get(current_token[0], parser.Error())
# print(
# "{stack: <30} {input: <15} {action: <5}".format(
# stack=repr([s[0] for s in stack[-5:]]),
# input=current_token[0],
# action=repr(action),
# )
# )
match action:
case parser.Accept():
return stack[-1][1]
@ -164,13 +156,9 @@ class Matcher:
raise Exception("How did I get a parse error here??")
class PrettyMeta(parser.SyntaxMeta):
newline: bool
indent: int | None
group: bool
class Printer:
# TODO: Pre-generate the matcher tables for a grammar, to make it
# possible to do codegen in other languages.
grammar: parser.Grammar
_matchers: dict[str, Matcher]
_nonterminals: dict[str, parser.NonTerminal]
@ -227,8 +215,8 @@ class Printer:
meta, children = item
tx_children = compile_production(children)
pretty = meta.get("prettier")
if isinstance(pretty, PrettyMeta):
pretty = meta.get("format")
if isinstance(pretty, parser.FormatMeta):
if pretty.group:
# Make a fake rule.
rule_name = f"g_{group_count}"
@ -261,8 +249,6 @@ class Printer:
gen = self.grammar._generator(rule.name, generated_grammar)
parse_table = gen.gen_table()
# print(parse_table.format())
return Matcher(parse_table, indent_amounts)
def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: