diff --git a/grammar.py b/grammar.py index 7d23e87..3aac841 100644 --- a/grammar.py +++ b/grammar.py @@ -83,7 +83,7 @@ class FineGrammar(Grammar): @rule("TypeIdentifier") def type_identifier(self) -> Rule: - return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type) + return self.IDENTIFIER @rule def export_statement(self) -> Rule: @@ -419,7 +419,7 @@ class FineGrammar(Grammar): if __name__ == "__main__": from pathlib import Path from parser.parser import dump_lexer_table - from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries + from parser.tree_sitter import emit_tree_sitter_grammar grammar = FineGrammar() grammar.build_table() @@ -428,4 +428,3 @@ if __name__ == "__main__": dump_lexer_table(lexer) emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine") - emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine") diff --git a/parser/parser.py b/parser/parser.py index ed67456..86e4e15 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1567,9 +1567,6 @@ class GenerateLALR(GenerateLR1): return result -FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"] - - ############################################################################### # Sugar for constructing grammars ############################################################################### @@ -1587,9 +1584,7 @@ class Rule: return SequenceRule(self, other) @abc.abstractmethod - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: + def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: """Convert this potentially nested and branching set of rules into a series of nice, flat symbol lists. @@ -1623,11 +1618,8 @@ class Terminal(Rule): self.meta = kwargs self.regex = isinstance(pattern, Re) - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: + def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: # We are just ourselves when flattened. - del with_metadata yield [self] def __repr__(self) -> str: @@ -1668,24 +1660,14 @@ class NonTerminal(Rule): We do this by first calling the associated function in order to get a Rule, and then flattening the Rule into the associated set of - productions. We strip the metadata from the flattened result to make - life a little easier for the caller. + productions. """ + return [rule for rule in self.fn(grammar).flatten()] - def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]: - for item in result: - assert not isinstance(item, tuple) - return typing.cast(list[str | Terminal], result) - - return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)] - - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: + def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: # Although we contain multitudes, when flattened we're being asked in # the context of some other production. Yield ourselves, and trust that # in time we will be asked to generate our body. - del with_metadata yield [self.name] @@ -1696,13 +1678,11 @@ class AlternativeRule(Rule): self.left = left self.right = right - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: + def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: # All the things from the left of the alternative, then all the things # from the right, never intermingled. - yield from self.left.flatten(with_metadata) - yield from self.right.flatten(with_metadata) + yield from self.left.flatten() + yield from self.right.flatten() class SequenceRule(Rule): @@ -1714,13 +1694,11 @@ class SequenceRule(Rule): self.first = first self.second = second - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: + def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: # All the things in the prefix.... - for first in self.first.flatten(with_metadata): + for first in self.first.flatten(): # ...potentially followed by all the things in the suffix. - for second in self.second.flatten(with_metadata): + for second in self.second.flatten(): yield first + second @@ -1729,32 +1707,14 @@ class NothingRule(Rule): these, you're probably better off just using the singleton `Nothing`. """ - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: + def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: # It's quiet in here. - del with_metadata yield [] Nothing = NothingRule() -class MetadataRule(Rule): - def __init__(self, rule: Rule, metadata: dict[str, typing.Any]): - self.rule = rule - self.metadata = metadata - - def flatten( - self, with_metadata: bool = False - ) -> typing.Generator[FlattenedWithMetadata, None, None]: - if with_metadata: - for result in self.rule.flatten(with_metadata=True): - yield [(self.metadata, result)] - else: - yield from self.rule.flatten(with_metadata=False) - - def alt(*args: Rule) -> Rule: """A rule that matches one of a series of alternatives. @@ -1781,6 +1741,15 @@ def opt(*args: Rule) -> Rule: return AlternativeRule(seq(*args), Nothing) +class MetadataRule(Rule): + def __init__(self, rule: Rule, metadata: dict[str, typing.Any]): + self.rule = rule + self.metadata = metadata + + def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: + yield from self.rule.flatten() + + def mark(rule: Rule, **kwargs) -> Rule: return MetadataRule(rule, kwargs) diff --git a/parser/tree_sitter.py b/parser/tree_sitter.py index cd170ae..d2f02fc 100644 --- a/parser/tree_sitter.py +++ b/parser/tree_sitter.py @@ -270,57 +270,3 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): f.write("\n }\n") f.write("});") - - -def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str): - nts = {nt.name: nt for nt in grammar.non_terminals()} - - def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]: - parts = [] - for item in input: - if isinstance(item, tuple): - meta, sub = item - parts.extend(scoop(sub, visited)) - - highlight = meta.get("highlight") - if isinstance(highlight, parser.HighlightMeta): - field_name = meta.get("field") - if not isinstance(field_name, str): - raise Exception("Highlight must come with a field name") # TODO - parts.append(f"{field_name}: _ @{highlight.scope}") - - elif isinstance(item, str): - nt = nts[item] - if nt.transparent: - if nt.name in visited: - continue - visited.add(nt.name) - body = nt.fn(grammar) - for production in body.flatten(with_metadata=True): - parts.extend(scoop(production, visited)) - - return parts - - queries = [] - for rule in grammar.non_terminals(): - if rule.transparent: - continue - - body = rule.fn(grammar) - patterns = set() - for production in body.flatten(with_metadata=True): - # Scoop up the meta... - patterns = patterns | set(scoop(production, set())) - - if len(patterns) > 0: - pattern_str = "\n ".join(patterns) - queries.append(f"({rule.name}\n {pattern_str})") - - for rule in grammar.terminals(): - highlight = rule.meta.get("highlight") - if isinstance(highlight, parser.HighlightMeta): - queries.append(f"({terminal_name(rule)} @{highlight.scope})") - - path = pathlib.Path(path) / "highlight.scm" - with open(path, "w", encoding="utf-8") as f: - f.write("\n\n".join(queries)) diff --git a/pyproject.toml b/pyproject.toml index 16be6b2..c7721e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,6 @@ exclude = ["historical", ".venv"] venvPath = "." venv = ".venv" -[tool.pytest.ini_options] -norecursedirs = "tree-sitter-fine" - [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend"