Emit highlight queries for tree-sitter

Now we're starting to get somewhere!
This commit is contained in:
John Doty 2024-09-05 14:52:35 -07:00
parent d466f5d173
commit 51c4f14c26
3 changed files with 109 additions and 23 deletions

View file

@ -83,7 +83,7 @@ class FineGrammar(Grammar):
@rule("TypeIdentifier")
def type_identifier(self) -> Rule:
return self.IDENTIFIER
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
@rule
def export_statement(self) -> Rule:
@ -419,7 +419,7 @@ class FineGrammar(Grammar):
if __name__ == "__main__":
from pathlib import Path
from parser.parser import dump_lexer_table
from parser.tree_sitter import emit_tree_sitter_grammar
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
grammar = FineGrammar()
grammar.build_table()
@ -428,3 +428,4 @@ if __name__ == "__main__":
dump_lexer_table(lexer)
emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine")
emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine")

View file

@ -1567,6 +1567,9 @@ class GenerateLALR(GenerateLR1):
return result
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
###############################################################################
# Sugar for constructing grammars
###############################################################################
@ -1584,7 +1587,9 @@ class Rule:
return SequenceRule(self, other)
@abc.abstractmethod
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
"""Convert this potentially nested and branching set of rules into a
series of nice, flat symbol lists.
@ -1618,8 +1623,11 @@ class Terminal(Rule):
self.meta = kwargs
self.regex = isinstance(pattern, Re)
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# We are just ourselves when flattened.
del with_metadata
yield [self]
def __repr__(self) -> str:
@ -1660,14 +1668,24 @@ class NonTerminal(Rule):
We do this by first calling the associated function in order to get a
Rule, and then flattening the Rule into the associated set of
productions.
productions. We strip the metadata from the flattened result to make
life a little easier for the caller.
"""
return [rule for rule in self.fn(grammar).flatten()]
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]:
for item in result:
assert not isinstance(item, tuple)
return typing.cast(list[str | Terminal], result)
return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)]
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# Although we contain multitudes, when flattened we're being asked in
# the context of some other production. Yield ourselves, and trust that
# in time we will be asked to generate our body.
del with_metadata
yield [self.name]
@ -1678,11 +1696,13 @@ class AlternativeRule(Rule):
self.left = left
self.right = right
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# All the things from the left of the alternative, then all the things
# from the right, never intermingled.
yield from self.left.flatten()
yield from self.right.flatten()
yield from self.left.flatten(with_metadata)
yield from self.right.flatten(with_metadata)
class SequenceRule(Rule):
@ -1694,11 +1714,13 @@ class SequenceRule(Rule):
self.first = first
self.second = second
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# All the things in the prefix....
for first in self.first.flatten():
for first in self.first.flatten(with_metadata):
# ...potentially followed by all the things in the suffix.
for second in self.second.flatten():
for second in self.second.flatten(with_metadata):
yield first + second
@ -1707,14 +1729,32 @@ class NothingRule(Rule):
these, you're probably better off just using the singleton `Nothing`.
"""
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
# It's quiet in here.
del with_metadata
yield []
Nothing = NothingRule()
class MetadataRule(Rule):
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
self.rule = rule
self.metadata = metadata
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
if with_metadata:
for result in self.rule.flatten(with_metadata=True):
yield [(self.metadata, result)]
else:
yield from self.rule.flatten(with_metadata=False)
def alt(*args: Rule) -> Rule:
"""A rule that matches one of a series of alternatives.
@ -1741,15 +1781,6 @@ def opt(*args: Rule) -> Rule:
return AlternativeRule(seq(*args), Nothing)
class MetadataRule(Rule):
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
self.rule = rule
self.metadata = metadata
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
yield from self.rule.flatten()
def mark(rule: Rule, **kwargs) -> Rule:
return MetadataRule(rule, kwargs)

View file

@ -270,3 +270,57 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
f.write("\n }\n")
f.write("});")
def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
nts = {nt.name: nt for nt in grammar.non_terminals()}
def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]:
parts = []
for item in input:
if isinstance(item, tuple):
meta, sub = item
parts.extend(scoop(sub, visited))
highlight = meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
field_name = meta.get("field")
if not isinstance(field_name, str):
raise Exception("Highlight must come with a field name") # TODO
parts.append(f"{field_name}: _ @{highlight.scope}")
elif isinstance(item, str):
nt = nts[item]
if nt.transparent:
if nt.name in visited:
continue
visited.add(nt.name)
body = nt.fn(grammar)
for production in body.flatten(with_metadata=True):
parts.extend(scoop(production, visited))
return parts
queries = []
for rule in grammar.non_terminals():
if rule.transparent:
continue
body = rule.fn(grammar)
patterns = set()
for production in body.flatten(with_metadata=True):
# Scoop up the meta...
patterns = patterns | set(scoop(production, set()))
if len(patterns) > 0:
pattern_str = "\n ".join(patterns)
queries.append(f"({rule.name}\n {pattern_str})")
for rule in grammar.terminals():
highlight = rule.meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
queries.append(f"({terminal_name(rule)} @{highlight.scope})")
path = pathlib.Path(path) / "highlight.scm"
with open(path, "w", encoding="utf-8") as f:
f.write("\n\n".join(queries))