Emit highlight queries for tree-sitter

Now we're starting to get somewhere!
This commit is contained in:
John Doty 2024-09-05 14:52:35 -07:00
parent d466f5d173
commit 51c4f14c26
3 changed files with 109 additions and 23 deletions

View file

@ -83,7 +83,7 @@ class FineGrammar(Grammar):
@rule("TypeIdentifier") @rule("TypeIdentifier")
def type_identifier(self) -> Rule: def type_identifier(self) -> Rule:
return self.IDENTIFIER return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
@rule @rule
def export_statement(self) -> Rule: def export_statement(self) -> Rule:
@ -419,7 +419,7 @@ class FineGrammar(Grammar):
if __name__ == "__main__": if __name__ == "__main__":
from pathlib import Path from pathlib import Path
from parser.parser import dump_lexer_table from parser.parser import dump_lexer_table
from parser.tree_sitter import emit_tree_sitter_grammar from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
grammar = FineGrammar() grammar = FineGrammar()
grammar.build_table() grammar.build_table()
@ -428,3 +428,4 @@ if __name__ == "__main__":
dump_lexer_table(lexer) dump_lexer_table(lexer)
emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine") emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine")
emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine")

View file

@ -1567,6 +1567,9 @@ class GenerateLALR(GenerateLR1):
return result return result
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
############################################################################### ###############################################################################
# Sugar for constructing grammars # Sugar for constructing grammars
############################################################################### ###############################################################################
@ -1584,7 +1587,9 @@ class Rule:
return SequenceRule(self, other) return SequenceRule(self, other)
@abc.abstractmethod @abc.abstractmethod
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
"""Convert this potentially nested and branching set of rules into a """Convert this potentially nested and branching set of rules into a
series of nice, flat symbol lists. series of nice, flat symbol lists.
@ -1618,8 +1623,11 @@ class Terminal(Rule):
self.meta = kwargs self.meta = kwargs
self.regex = isinstance(pattern, Re) self.regex = isinstance(pattern, Re)
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# We are just ourselves when flattened. # We are just ourselves when flattened.
del with_metadata
yield [self] yield [self]
def __repr__(self) -> str: def __repr__(self) -> str:
@ -1660,14 +1668,24 @@ class NonTerminal(Rule):
We do this by first calling the associated function in order to get a We do this by first calling the associated function in order to get a
Rule, and then flattening the Rule into the associated set of Rule, and then flattening the Rule into the associated set of
productions. productions. We strip the metadata from the flattened result to make
life a little easier for the caller.
""" """
return [rule for rule in self.fn(grammar).flatten()]
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]:
for item in result:
assert not isinstance(item, tuple)
return typing.cast(list[str | Terminal], result)
return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)]
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# Although we contain multitudes, when flattened we're being asked in # Although we contain multitudes, when flattened we're being asked in
# the context of some other production. Yield ourselves, and trust that # the context of some other production. Yield ourselves, and trust that
# in time we will be asked to generate our body. # in time we will be asked to generate our body.
del with_metadata
yield [self.name] yield [self.name]
@ -1678,11 +1696,13 @@ class AlternativeRule(Rule):
self.left = left self.left = left
self.right = right self.right = right
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# All the things from the left of the alternative, then all the things # All the things from the left of the alternative, then all the things
# from the right, never intermingled. # from the right, never intermingled.
yield from self.left.flatten() yield from self.left.flatten(with_metadata)
yield from self.right.flatten() yield from self.right.flatten(with_metadata)
class SequenceRule(Rule): class SequenceRule(Rule):
@ -1694,11 +1714,13 @@ class SequenceRule(Rule):
self.first = first self.first = first
self.second = second self.second = second
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# All the things in the prefix.... # All the things in the prefix....
for first in self.first.flatten(): for first in self.first.flatten(with_metadata):
# ...potentially followed by all the things in the suffix. # ...potentially followed by all the things in the suffix.
for second in self.second.flatten(): for second in self.second.flatten(with_metadata):
yield first + second yield first + second
@ -1707,14 +1729,32 @@ class NothingRule(Rule):
these, you're probably better off just using the singleton `Nothing`. these, you're probably better off just using the singleton `Nothing`.
""" """
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# It's quiet in here. # It's quiet in here.
del with_metadata
yield [] yield []
Nothing = NothingRule() Nothing = NothingRule()
class MetadataRule(Rule):
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
self.rule = rule
self.metadata = metadata
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
if with_metadata:
for result in self.rule.flatten(with_metadata=True):
yield [(self.metadata, result)]
else:
yield from self.rule.flatten(with_metadata=False)
def alt(*args: Rule) -> Rule: def alt(*args: Rule) -> Rule:
"""A rule that matches one of a series of alternatives. """A rule that matches one of a series of alternatives.
@ -1741,15 +1781,6 @@ def opt(*args: Rule) -> Rule:
return AlternativeRule(seq(*args), Nothing) return AlternativeRule(seq(*args), Nothing)
class MetadataRule(Rule):
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
self.rule = rule
self.metadata = metadata
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
yield from self.rule.flatten()
def mark(rule: Rule, **kwargs) -> Rule: def mark(rule: Rule, **kwargs) -> Rule:
return MetadataRule(rule, kwargs) return MetadataRule(rule, kwargs)

View file

@ -270,3 +270,57 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
f.write("\n }\n") f.write("\n }\n")
f.write("});") f.write("});")
def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
nts = {nt.name: nt for nt in grammar.non_terminals()}
def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]:
parts = []
for item in input:
if isinstance(item, tuple):
meta, sub = item
parts.extend(scoop(sub, visited))
highlight = meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
field_name = meta.get("field")
if not isinstance(field_name, str):
raise Exception("Highlight must come with a field name") # TODO
parts.append(f"{field_name}: _ @{highlight.scope}")
elif isinstance(item, str):
nt = nts[item]
if nt.transparent:
if nt.name in visited:
continue
visited.add(nt.name)
body = nt.fn(grammar)
for production in body.flatten(with_metadata=True):
parts.extend(scoop(production, visited))
return parts
queries = []
for rule in grammar.non_terminals():
if rule.transparent:
continue
body = rule.fn(grammar)
patterns = set()
for production in body.flatten(with_metadata=True):
# Scoop up the meta...
patterns = patterns | set(scoop(production, set()))
if len(patterns) > 0:
pattern_str = "\n ".join(patterns)
queries.append(f"({rule.name}\n {pattern_str})")
for rule in grammar.terminals():
highlight = rule.meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
queries.append(f"({terminal_name(rule)} @{highlight.scope})")
path = pathlib.Path(path) / "highlight.scm"
with open(path, "w", encoding="utf-8") as f:
f.write("\n\n".join(queries))