Emit highlight queries for tree-sitter
Now we're starting to get somewhere!
This commit is contained in:
parent
d466f5d173
commit
51c4f14c26
3 changed files with 109 additions and 23 deletions
|
|
@ -83,7 +83,7 @@ class FineGrammar(Grammar):
|
|||
|
||||
@rule("TypeIdentifier")
|
||||
def type_identifier(self) -> Rule:
|
||||
return self.IDENTIFIER
|
||||
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
|
||||
|
||||
@rule
|
||||
def export_statement(self) -> Rule:
|
||||
|
|
@ -419,7 +419,7 @@ class FineGrammar(Grammar):
|
|||
if __name__ == "__main__":
|
||||
from pathlib import Path
|
||||
from parser.parser import dump_lexer_table
|
||||
from parser.tree_sitter import emit_tree_sitter_grammar
|
||||
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
|
||||
|
||||
grammar = FineGrammar()
|
||||
grammar.build_table()
|
||||
|
|
@ -428,3 +428,4 @@ if __name__ == "__main__":
|
|||
dump_lexer_table(lexer)
|
||||
|
||||
emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine")
|
||||
emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine")
|
||||
|
|
|
|||
|
|
@ -1567,6 +1567,9 @@ class GenerateLALR(GenerateLR1):
|
|||
return result
|
||||
|
||||
|
||||
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Sugar for constructing grammars
|
||||
###############################################################################
|
||||
|
|
@ -1584,7 +1587,9 @@ class Rule:
|
|||
return SequenceRule(self, other)
|
||||
|
||||
@abc.abstractmethod
|
||||
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
"""Convert this potentially nested and branching set of rules into a
|
||||
series of nice, flat symbol lists.
|
||||
|
||||
|
|
@ -1618,8 +1623,11 @@ class Terminal(Rule):
|
|||
self.meta = kwargs
|
||||
self.regex = isinstance(pattern, Re)
|
||||
|
||||
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
# We are just ourselves when flattened.
|
||||
del with_metadata
|
||||
yield [self]
|
||||
|
||||
def __repr__(self) -> str:
|
||||
|
|
@ -1660,14 +1668,24 @@ class NonTerminal(Rule):
|
|||
|
||||
We do this by first calling the associated function in order to get a
|
||||
Rule, and then flattening the Rule into the associated set of
|
||||
productions.
|
||||
productions. We strip the metadata from the flattened result to make
|
||||
life a little easier for the caller.
|
||||
"""
|
||||
return [rule for rule in self.fn(grammar).flatten()]
|
||||
|
||||
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||
def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]:
|
||||
for item in result:
|
||||
assert not isinstance(item, tuple)
|
||||
return typing.cast(list[str | Terminal], result)
|
||||
|
||||
return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)]
|
||||
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
# Although we contain multitudes, when flattened we're being asked in
|
||||
# the context of some other production. Yield ourselves, and trust that
|
||||
# in time we will be asked to generate our body.
|
||||
del with_metadata
|
||||
yield [self.name]
|
||||
|
||||
|
||||
|
|
@ -1678,11 +1696,13 @@ class AlternativeRule(Rule):
|
|||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
# All the things from the left of the alternative, then all the things
|
||||
# from the right, never intermingled.
|
||||
yield from self.left.flatten()
|
||||
yield from self.right.flatten()
|
||||
yield from self.left.flatten(with_metadata)
|
||||
yield from self.right.flatten(with_metadata)
|
||||
|
||||
|
||||
class SequenceRule(Rule):
|
||||
|
|
@ -1694,11 +1714,13 @@ class SequenceRule(Rule):
|
|||
self.first = first
|
||||
self.second = second
|
||||
|
||||
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
# All the things in the prefix....
|
||||
for first in self.first.flatten():
|
||||
for first in self.first.flatten(with_metadata):
|
||||
# ...potentially followed by all the things in the suffix.
|
||||
for second in self.second.flatten():
|
||||
for second in self.second.flatten(with_metadata):
|
||||
yield first + second
|
||||
|
||||
|
||||
|
|
@ -1707,14 +1729,32 @@ class NothingRule(Rule):
|
|||
these, you're probably better off just using the singleton `Nothing`.
|
||||
"""
|
||||
|
||||
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
# It's quiet in here.
|
||||
del with_metadata
|
||||
yield []
|
||||
|
||||
|
||||
Nothing = NothingRule()
|
||||
|
||||
|
||||
class MetadataRule(Rule):
|
||||
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
|
||||
self.rule = rule
|
||||
self.metadata = metadata
|
||||
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
if with_metadata:
|
||||
for result in self.rule.flatten(with_metadata=True):
|
||||
yield [(self.metadata, result)]
|
||||
else:
|
||||
yield from self.rule.flatten(with_metadata=False)
|
||||
|
||||
|
||||
def alt(*args: Rule) -> Rule:
|
||||
"""A rule that matches one of a series of alternatives.
|
||||
|
||||
|
|
@ -1741,15 +1781,6 @@ def opt(*args: Rule) -> Rule:
|
|||
return AlternativeRule(seq(*args), Nothing)
|
||||
|
||||
|
||||
class MetadataRule(Rule):
|
||||
def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
|
||||
self.rule = rule
|
||||
self.metadata = metadata
|
||||
|
||||
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||
yield from self.rule.flatten()
|
||||
|
||||
|
||||
def mark(rule: Rule, **kwargs) -> Rule:
|
||||
return MetadataRule(rule, kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -270,3 +270,57 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
|
|||
|
||||
f.write("\n }\n")
|
||||
f.write("});")
|
||||
|
||||
|
||||
def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
|
||||
nts = {nt.name: nt for nt in grammar.non_terminals()}
|
||||
|
||||
def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]:
|
||||
parts = []
|
||||
for item in input:
|
||||
if isinstance(item, tuple):
|
||||
meta, sub = item
|
||||
parts.extend(scoop(sub, visited))
|
||||
|
||||
highlight = meta.get("highlight")
|
||||
if isinstance(highlight, parser.HighlightMeta):
|
||||
field_name = meta.get("field")
|
||||
if not isinstance(field_name, str):
|
||||
raise Exception("Highlight must come with a field name") # TODO
|
||||
parts.append(f"{field_name}: _ @{highlight.scope}")
|
||||
|
||||
elif isinstance(item, str):
|
||||
nt = nts[item]
|
||||
if nt.transparent:
|
||||
if nt.name in visited:
|
||||
continue
|
||||
visited.add(nt.name)
|
||||
body = nt.fn(grammar)
|
||||
for production in body.flatten(with_metadata=True):
|
||||
parts.extend(scoop(production, visited))
|
||||
|
||||
return parts
|
||||
|
||||
queries = []
|
||||
for rule in grammar.non_terminals():
|
||||
if rule.transparent:
|
||||
continue
|
||||
|
||||
body = rule.fn(grammar)
|
||||
patterns = set()
|
||||
for production in body.flatten(with_metadata=True):
|
||||
# Scoop up the meta...
|
||||
patterns = patterns | set(scoop(production, set()))
|
||||
|
||||
if len(patterns) > 0:
|
||||
pattern_str = "\n ".join(patterns)
|
||||
queries.append(f"({rule.name}\n {pattern_str})")
|
||||
|
||||
for rule in grammar.terminals():
|
||||
highlight = rule.meta.get("highlight")
|
||||
if isinstance(highlight, parser.HighlightMeta):
|
||||
queries.append(f"({terminal_name(rule)} @{highlight.scope})")
|
||||
|
||||
path = pathlib.Path(path) / "highlight.scm"
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write("\n\n".join(queries))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue