From 3012df4ac63bad2576a72cc2d2f0eba0d9b237f5 Mon Sep 17 00:00:00 2001 From: John Doty Date: Sat, 31 Aug 2024 07:22:49 -0700 Subject: [PATCH] Precedence but it doesn't work Tree sitter doesn't let me do token-based precedence? I don't like tree-sitter's "make it inline but give it a number" system- seems like a bug farm to me. --- grammar.py | 15 ++++++++++----- parser/parser.py | 7 +++---- parser/tree_sitter.py | 27 ++++++++++++++++++++++----- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/grammar.py b/grammar.py index 7a4ae75..622c75e 100644 --- a/grammar.py +++ b/grammar.py @@ -1,5 +1,5 @@ # This is an example grammar. -from parser import Assoc, Grammar, Nothing, rule, seq, Rule, Terminal, Re, Highlight, mark, opt +from parser import Assoc, Grammar, rule, seq, Rule, Terminal, Re, Highlight, mark, opt class FineGrammar(Grammar): @@ -96,7 +96,11 @@ class FineGrammar(Grammar): @rule def export_list(self) -> Rule: - return Nothing | self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, self.export_list) + return ( + self.IDENTIFIER + | seq(self.IDENTIFIER, self.COMMA) + | seq(self.IDENTIFIER, self.COMMA, self.export_list) + ) # Functions @rule("FunctionDecl") @@ -114,8 +118,9 @@ class FineGrammar(Grammar): return seq( self.LPAREN, opt( - self._first_parameter, - opt(self.COMMA, self._parameter_list), + self._first_parameter + | seq(self._first_parameter, self.COMMA) + | seq(self._first_parameter, self.COMMA, self._parameter_list) ), self.RPAREN, ) @@ -126,7 +131,7 @@ class FineGrammar(Grammar): @rule def _parameter_list(self) -> Rule: - return Nothing | self.parameter | seq(self.parameter, self.COMMA, self._parameter_list) + return self.parameter | seq(self.parameter, self.COMMA, self._parameter_list) @rule("Parameter") def parameter(self) -> Rule: diff --git a/parser/parser.py b/parser/parser.py index 5d47160..75b8f4e 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -2524,13 +2524,12 @@ class Grammar: def terminals(self) -> list[Terminal]: return self._terminals - @property - def resolved_trivia(self) -> list[Terminal]: - return self._trivia - def non_terminals(self) -> list[NonTerminal]: return [nt for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal))] + def get_precedence(self, name: str) -> None | tuple[Assoc, int]: + return self._precedence.get(name) + def generate_nonterminal_dict( self, start: str | None = None ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]: diff --git a/parser/tree_sitter.py b/parser/tree_sitter.py index 87c7757..6cd7303 100644 --- a/parser/tree_sitter.py +++ b/parser/tree_sitter.py @@ -99,9 +99,21 @@ def to_javascript_regex(re: parser.Re) -> str: raise Exception(f"Regex node {re} not supported for tree-sitter") -def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: - # TODO: Precedence? +def apply_precedence(js: str, name: str, grammar: parser.Grammar) -> str: + prec = grammar.get_precedence(name) + if prec is not None: + assoc, level = prec + if assoc == parser.Assoc.LEFT: + js = f"prec.left({level}, {js})" + elif assoc == parser.Assoc.RIGHT: + js = f"prec.right({level}, {js})" + else: + js = f"prec({level}, {js})" + return js + + +def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: method = getattr(rule, "convert_to_tree_sitter", None) if method is not None: return method(grammar) @@ -109,10 +121,14 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: if isinstance(rule, parser.Terminal): if isinstance(rule.pattern, parser.Re): regex = to_javascript_regex(rule.pattern) - return f"/{regex}/" + result = f"/{regex}/" else: string = to_js_string(rule.pattern) - return f'"{string}"' + result = f'"{string}"' + + if rule.name is not None: + result = apply_precedence(result, rule.name, grammar) + return result elif isinstance(rule, parser.AlternativeRule): final = [] @@ -176,7 +192,6 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: # https://tree-sitter.github.io/tree-sitter/creating-parsers def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): - # TODO: PRECEDENCE path = pathlib.Path(path) / "grammar.js" with open(path, "w", encoding="utf-8") as f: f.write('/// \n') @@ -195,6 +210,8 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): body = rule.fn(grammar) rule_definition = convert_to_tree_sitter(body, grammar) + rule_definition = apply_precedence(rule_definition, rule.name, grammar) + f.write(f" '{rule_name}': $ => {rule_definition},") f.write(" }\n")