diff --git a/parser/tree_sitter.py b/parser/tree_sitter.py index daf4940..d2f02fc 100644 --- a/parser/tree_sitter.py +++ b/parser/tree_sitter.py @@ -28,9 +28,9 @@ def to_javascript_regex(re: parser.Re) -> str: else: final.append(part) - s = "".join([to_javascript_regex(p) for p in final]) + s = ", ".join([to_javascript_regex(p) for p in final]) if len(final) > 1: - s = f"({s})" + s = f"seq({s})" return s elif isinstance(re, parser.ReAlt): @@ -45,22 +45,22 @@ def to_javascript_regex(re: parser.Re) -> str: else: final.append(part) - s = "|".join([to_javascript_regex(p) for p in final]) + s = ", ".join([to_javascript_regex(p) for p in final]) if len(final) > 1: - s = f"({s})" + s = f"choice({s})" return s elif isinstance(re, parser.ReQuestion): s = to_javascript_regex(re.child) - return f"{s}?" + return f"optional({s})" elif isinstance(re, parser.RePlus): s = to_javascript_regex(re.child) - return f"{s}+" + return f"repeat1({s})" elif isinstance(re, parser.ReStar): s = to_javascript_regex(re.child) - return f"{s}*" + return f"repeat({s})" elif isinstance(re, parser.ReSet): if ( @@ -68,7 +68,7 @@ def to_javascript_regex(re: parser.Re) -> str: and re.values[0].lower == 0 and re.values[0].upper == parser.UNICODE_MAX_CP ): - return "." + return "/./" inverted = re.inversion if inverted: @@ -93,21 +93,31 @@ def to_javascript_regex(re: parser.Re) -> str: # The only time this isn't a "set" is if this is a set of one # range that is one character long, in which case it's better # represented as a literal. - s = f"[{s}]" + s = f"/[{s}]/" + else: + s = s.replace("'", "\\'") + s = f"'{s}'" return s raise Exception(f"Regex node {re} not supported for tree-sitter") +def terminal_name(t: parser.Terminal) -> str: + terminal_name = t.name + if terminal_name is None: + raise Exception("The terminal was not assigned a name: {t}") + return terminal_name + + def terminal_to_tree_sitter(rule: parser.Terminal) -> str: if isinstance(rule.pattern, parser.Re): - regex = to_javascript_regex(rule.pattern) - regex = regex.replace("/", "\\/") - result = f"/{regex}/" + result = to_javascript_regex(rule.pattern) + # regex = regex.replace("/", "\\/") + # result = f"/{regex}/" else: string = to_js_string(rule.pattern) result = f'"{string}"' - return result + return f"token({result})" def apply_precedence(js: str, name: str, grammar: parser.Grammar) -> str: @@ -130,7 +140,10 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str: return method(grammar) if isinstance(rule, parser.Terminal): - return terminal_to_tree_sitter(rule) + # NOTE: We used to just inline these but now we explicitly have names + # for the tokens. + target_name = terminal_name(rule) + return f"$['{target_name}']" elif isinstance(rule, parser.AlternativeRule): final = [] @@ -230,7 +243,7 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): f.write("module.exports = grammar({\n") f.write(f" name: '{grammar.name}',\n") - extras = ", ".join([terminal_to_tree_sitter(t) for t in grammar.trivia_terminals()]) + extras = ", ".join([f"$['{terminal_name(t)}']" for t in grammar.trivia_terminals()]) f.write(f" extras: $ => [{extras}],\n") f.write(" rules: {\n") @@ -248,5 +261,12 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): f.write(f" '{rule_name}': $ => {rule_definition},") + f.write("\n") + for rule in grammar.terminals(): + f.write("\n") + + definition = terminal_to_tree_sitter(rule) + f.write(f" '{rule.name}': $ => {definition},") + f.write("\n }\n") f.write("});")