diff --git a/grammar.py b/grammar.py index 3aac841..7d23e87 100644 --- a/grammar.py +++ b/grammar.py @@ -83,7 +83,7 @@ class FineGrammar(Grammar): @rule("TypeIdentifier") def type_identifier(self) -> Rule: - return self.IDENTIFIER + return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type) @rule def export_statement(self) -> Rule: @@ -419,7 +419,7 @@ class FineGrammar(Grammar): if __name__ == "__main__": from pathlib import Path from parser.parser import dump_lexer_table - from parser.tree_sitter import emit_tree_sitter_grammar + from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries grammar = FineGrammar() grammar.build_table() @@ -428,3 +428,4 @@ if __name__ == "__main__": dump_lexer_table(lexer) emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine") + emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine") diff --git a/parser/parser.py b/parser/parser.py index 86e4e15..ed67456 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1567,6 +1567,9 @@ class GenerateLALR(GenerateLR1): return result +FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"] + + ############################################################################### # Sugar for constructing grammars ############################################################################### @@ -1584,7 +1587,9 @@ class Rule: return SequenceRule(self, other) @abc.abstractmethod - def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: """Convert this potentially nested and branching set of rules into a series of nice, flat symbol lists. @@ -1618,8 +1623,11 @@ class Terminal(Rule): self.meta = kwargs self.regex = isinstance(pattern, Re) - def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: # We are just ourselves when flattened. + del with_metadata yield [self] def __repr__(self) -> str: @@ -1660,14 +1668,24 @@ class NonTerminal(Rule): We do this by first calling the associated function in order to get a Rule, and then flattening the Rule into the associated set of - productions. + productions. We strip the metadata from the flattened result to make + life a little easier for the caller. """ - return [rule for rule in self.fn(grammar).flatten()] - def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: + def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]: + for item in result: + assert not isinstance(item, tuple) + return typing.cast(list[str | Terminal], result) + + return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)] + + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: # Although we contain multitudes, when flattened we're being asked in # the context of some other production. Yield ourselves, and trust that # in time we will be asked to generate our body. + del with_metadata yield [self.name] @@ -1678,11 +1696,13 @@ class AlternativeRule(Rule): self.left = left self.right = right - def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: # All the things from the left of the alternative, then all the things # from the right, never intermingled. - yield from self.left.flatten() - yield from self.right.flatten() + yield from self.left.flatten(with_metadata) + yield from self.right.flatten(with_metadata) class SequenceRule(Rule): @@ -1694,11 +1714,13 @@ class SequenceRule(Rule): self.first = first self.second = second - def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: # All the things in the prefix.... - for first in self.first.flatten(): + for first in self.first.flatten(with_metadata): # ...potentially followed by all the things in the suffix. - for second in self.second.flatten(): + for second in self.second.flatten(with_metadata): yield first + second @@ -1707,14 +1729,32 @@ class NothingRule(Rule): these, you're probably better off just using the singleton `Nothing`. """ - def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: # It's quiet in here. + del with_metadata yield [] Nothing = NothingRule() +class MetadataRule(Rule): + def __init__(self, rule: Rule, metadata: dict[str, typing.Any]): + self.rule = rule + self.metadata = metadata + + def flatten( + self, with_metadata: bool = False + ) -> typing.Generator[FlattenedWithMetadata, None, None]: + if with_metadata: + for result in self.rule.flatten(with_metadata=True): + yield [(self.metadata, result)] + else: + yield from self.rule.flatten(with_metadata=False) + + def alt(*args: Rule) -> Rule: """A rule that matches one of a series of alternatives. @@ -1741,15 +1781,6 @@ def opt(*args: Rule) -> Rule: return AlternativeRule(seq(*args), Nothing) -class MetadataRule(Rule): - def __init__(self, rule: Rule, metadata: dict[str, typing.Any]): - self.rule = rule - self.metadata = metadata - - def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: - yield from self.rule.flatten() - - def mark(rule: Rule, **kwargs) -> Rule: return MetadataRule(rule, kwargs) diff --git a/parser/tree_sitter.py b/parser/tree_sitter.py index d2f02fc..cd170ae 100644 --- a/parser/tree_sitter.py +++ b/parser/tree_sitter.py @@ -270,3 +270,57 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): f.write("\n }\n") f.write("});") + + +def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str): + nts = {nt.name: nt for nt in grammar.non_terminals()} + + def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]: + parts = [] + for item in input: + if isinstance(item, tuple): + meta, sub = item + parts.extend(scoop(sub, visited)) + + highlight = meta.get("highlight") + if isinstance(highlight, parser.HighlightMeta): + field_name = meta.get("field") + if not isinstance(field_name, str): + raise Exception("Highlight must come with a field name") # TODO + parts.append(f"{field_name}: _ @{highlight.scope}") + + elif isinstance(item, str): + nt = nts[item] + if nt.transparent: + if nt.name in visited: + continue + visited.add(nt.name) + body = nt.fn(grammar) + for production in body.flatten(with_metadata=True): + parts.extend(scoop(production, visited)) + + return parts + + queries = [] + for rule in grammar.non_terminals(): + if rule.transparent: + continue + + body = rule.fn(grammar) + patterns = set() + for production in body.flatten(with_metadata=True): + # Scoop up the meta... + patterns = patterns | set(scoop(production, set())) + + if len(patterns) > 0: + pattern_str = "\n ".join(patterns) + queries.append(f"({rule.name}\n {pattern_str})") + + for rule in grammar.terminals(): + highlight = rule.meta.get("highlight") + if isinstance(highlight, parser.HighlightMeta): + queries.append(f"({terminal_name(rule)} @{highlight.scope})") + + path = pathlib.Path(path) / "highlight.scm" + with open(path, "w", encoding="utf-8") as f: + f.write("\n\n".join(queries))