diff --git a/parser/parser.py b/parser/parser.py index c69fe65..23e1935 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1740,6 +1740,12 @@ class NothingRule(Rule): Nothing = NothingRule() +class SyntaxMeta: + """A maybe base class for annotations to a rule.""" + + pass + + class MetadataRule(Rule): def __init__(self, rule: Rule, metadata: dict[str, typing.Any]): self.rule = rule @@ -1785,18 +1791,6 @@ def mark(rule: Rule, **kwargs) -> Rule: return MetadataRule(rule, kwargs) -def group(*rules: Rule) -> Rule: - return seq(*rules) - - -def indent(*rules: Rule) -> Rule: - return seq(*rules) - - -def newline() -> Rule: - return Nothing - - @typing.overload def rule(f: typing.Callable, /) -> Rule: ... @@ -2366,22 +2360,9 @@ def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"): f.write("}\n") -# NOTE: We have rich metadata system man, wow, how cool are we? -# -# The whole point of this stuff here is to allow automatic -# generation/maintenance of syntax coloring for editors. And maybe some -# other stuff? This is *extremely provisional*, I'm not even sure it -# makes sense yet. Tree sitter works differently, for example, and it's -# not clear at all what we want to generate for any particular editor. -# -# This here might be enough to produce extremely basic TextMate -# grammars but anything more complicated will want tree patterns -# anyway, and we can only do tree patterns by influencing the grammar. -# -# Here's the info on textmate grammars: -# https://macromates.com/manual/en/language_grammars -class SyntaxMeta: - pass +############################################################################### +# Highlighting metadata support +############################################################################### class HighlightMeta(SyntaxMeta): @@ -2690,6 +2671,33 @@ class _Highlight: highlight = _Highlight() + +############################################################################### +# Pretty-printing metadata support +############################################################################### + + +@dataclasses.dataclass +class FormatMeta(SyntaxMeta): + newline: bool = False + indent: int | None = None + group: bool = False + + +def group(*rules: Rule) -> Rule: + return mark(seq(*rules), format=FormatMeta(group=True)) + + +def indent(*rules: Rule, amount: int | None = None) -> Rule: + if amount is None: + amount = 4 + return mark(seq(*rules), format=FormatMeta(indent=amount)) + + +def newline() -> Rule: + return mark(Nothing, format=FormatMeta(newline=True)) + + ############################################################################### # Finally, the base class for grammars ############################################################################### diff --git a/parser/tree_sitter.py b/parser/tree_sitter.py index 55c1516..7f9d231 100644 --- a/parser/tree_sitter.py +++ b/parser/tree_sitter.py @@ -134,7 +134,7 @@ def apply_precedence(js: str, name: str, grammar: parser.Grammar) -> str: return js -def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: +def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str | None: method = getattr(rule, "convert_to_tree_sitter", None) if method is not None: return method(grammar) @@ -146,7 +146,7 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: return f"$['{target_name}']" elif isinstance(rule, parser.AlternativeRule): - final = [] + final: list[str] = [] queue = [] has_nothing = False queue.append(rule) @@ -155,15 +155,17 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: if isinstance(part, parser.AlternativeRule): queue.append(part.right) queue.append(part.left) - elif isinstance(part, parser.NothingRule): - has_nothing = True else: - final.append(part) + converted = convert_to_tree_sitter(part, grammar) + if converted is None: + has_nothing = True + else: + final.append(converted) if len(final) == 0: raise Exception("Unsupported rule: empty alternative") - result = ", ".join([convert_to_tree_sitter(r, grammar) for r in final]) + result = ", ".join(final) if len(final) > 1: result = f"choice({result})" if has_nothing: @@ -172,6 +174,7 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: elif isinstance(rule, parser.SequenceRule): final = [] + pieces = [] queue = [] queue.append(rule) while len(queue) > 0: @@ -179,10 +182,11 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: if isinstance(part, parser.SequenceRule): queue.append(part.second) queue.append(part.first) - elif isinstance(part, parser.NothingRule): - pass else: - final.append(part) + piece = convert_to_tree_sitter(part, grammar) + if piece is not None: + pieces.append(piece) + final.append(part) if len(final) == 0: raise Exception("Unsupported rule: empty sequence") @@ -196,8 +200,6 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: # # https://github.com/tree-sitter/tree-sitter/issues/372 # - pieces = [convert_to_tree_sitter(r, grammar) for r in final] - def make_seq(pieces: list[str]): if len(pieces) == 1: return pieces[0] @@ -223,11 +225,17 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: elif isinstance(rule, parser.MetadataRule): result = convert_to_tree_sitter(rule.rule, grammar) + if result is None: + return None + field = rule.metadata.get("field") if field is not None: result = f"field('{field}', {result})" return result + elif isinstance(rule, parser.NothingRule): + return None + else: raise ValueError(f"Rule {rule} not supported for tree-sitter") @@ -257,6 +265,8 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): body = rule.fn(grammar) rule_definition = convert_to_tree_sitter(body, grammar) + if rule_definition is None: + raise Exception(f"Tree-sitter does not support the empty rule {rule_name}") rule_definition = apply_precedence(rule_definition, rule.name, grammar) f.write(f" '{rule_name}': $ => {rule_definition},") diff --git a/parser/wadler.py b/parser/wadler.py index bc712da..98c249f 100644 --- a/parser/wadler.py +++ b/parser/wadler.py @@ -109,14 +109,6 @@ class Matcher: current_state = stack[-1][0] action = table.actions[current_state].get(current_token[0], parser.Error()) - # print( - # "{stack: <30} {input: <15} {action: <5}".format( - # stack=repr([s[0] for s in stack[-5:]]), - # input=current_token[0], - # action=repr(action), - # ) - # ) - match action: case parser.Accept(): return stack[-1][1] @@ -164,13 +156,9 @@ class Matcher: raise Exception("How did I get a parse error here??") -class PrettyMeta(parser.SyntaxMeta): - newline: bool - indent: int | None - group: bool - - class Printer: + # TODO: Pre-generate the matcher tables for a grammar, to make it + # possible to do codegen in other languages. grammar: parser.Grammar _matchers: dict[str, Matcher] _nonterminals: dict[str, parser.NonTerminal] @@ -227,8 +215,8 @@ class Printer: meta, children = item tx_children = compile_production(children) - pretty = meta.get("prettier") - if isinstance(pretty, PrettyMeta): + pretty = meta.get("format") + if isinstance(pretty, parser.FormatMeta): if pretty.group: # Make a fake rule. rule_name = f"g_{group_count}" @@ -261,8 +249,6 @@ class Printer: gen = self.grammar._generator(rule.name, generated_grammar) parse_table = gen.gen_table() - # print(parse_table.format()) - return Matcher(parse_table, indent_amounts) def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: