Move formatting meta around, actually mark stuff up

2024-09-10 11:47:22 -07:00 · 2024-09-10 11:47:22 -07:00 · 443bf8bd33
commit 443bf8bd33
parent 7edf5e06bf
3 changed files with 61 additions and 57 deletions
--- a/parser/parser.py
+++ b/parser/parser.py
@ -1740,6 +1740,12 @@ class NothingRule(Rule):
 Nothing = NothingRule()
 class SyntaxMeta:
    """A maybe base class for annotations to a rule."""
    pass
 class MetadataRule(Rule):
    def __init__(self, rule: Rule, metadata: dict[str, typing.Any]):
        self.rule = rule
@ -1785,18 +1791,6 @@ def mark(rule: Rule, **kwargs) -> Rule:
    return MetadataRule(rule, kwargs)
 def group(*rules: Rule) -> Rule:
    return seq(*rules)
 def indent(*rules: Rule) -> Rule:
    return seq(*rules)
 def newline() -> Rule:
    return Nothing
@typing.overload
 def rule(f: typing.Callable, /) -> Rule: ...
@ -2366,22 +2360,9 @@ def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"):
        f.write("}\n")
-# NOTE: We have rich metadata system man, wow, how cool are we?
+###############################################################################
-#
+# Highlighting metadata support
-#       The whole point of this stuff here is to allow automatic
+###############################################################################
 #       generation/maintenance of syntax coloring for editors. And maybe some
 #       other stuff? This is *extremely provisional*, I'm not even sure it
 #       makes sense yet. Tree sitter works differently, for example, and it's
 #       not clear at all what we want to generate for any particular editor.
 #
 #       This here might be enough to produce extremely basic TextMate
 #       grammars but anything more complicated will want tree patterns
 #       anyway, and we can only do tree patterns by influencing the grammar.
 #
 #       Here's the info on textmate grammars:
 #           https://macromates.com/manual/en/language_grammars
 class SyntaxMeta:
    pass
 class HighlightMeta(SyntaxMeta):
@ -2690,6 +2671,33 @@ class _Highlight:
 highlight = _Highlight()
 ###############################################################################
 # Pretty-printing metadata support
 ###############################################################################
@dataclasses.dataclass
 class FormatMeta(SyntaxMeta):
    newline: bool = False
    indent: int | None = None
    group: bool = False
 def group(*rules: Rule) -> Rule:
    return mark(seq(*rules), format=FormatMeta(group=True))
 def indent(*rules: Rule, amount: int | None = None) -> Rule:
    if amount is None:
        amount = 4
    return mark(seq(*rules), format=FormatMeta(indent=amount))
 def newline() -> Rule:
    return mark(Nothing, format=FormatMeta(newline=True))
 ###############################################################################
 # Finally, the base class for grammars
 ###############################################################################
--- a/parser/tree_sitter.py
+++ b/parser/tree_sitter.py
@ -134,7 +134,7 @@ def apply_precedence(js: str, name: str, grammar: parser.Grammar) -> str:
    return js
-def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
+def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str | None:
    method = getattr(rule, "convert_to_tree_sitter", None)
    if method is not None:
        return method(grammar)
@ -146,7 +146,7 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
        return f"$['{target_name}']"
    elif isinstance(rule, parser.AlternativeRule):
-        final = []
+        final: list[str] = []
        queue = []
        has_nothing = False
        queue.append(rule)
@ -155,15 +155,17 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
            if isinstance(part, parser.AlternativeRule):
                queue.append(part.right)
                queue.append(part.left)
            elif isinstance(part, parser.NothingRule):
                has_nothing = True
            else:
-                final.append(part)
+                converted = convert_to_tree_sitter(part, grammar)
                if converted is None:
                    has_nothing = True
                else:
                    final.append(converted)
        if len(final) == 0:
            raise Exception("Unsupported rule: empty alternative")
-        result = ", ".join([convert_to_tree_sitter(r, grammar) for r in final])
+        result = ", ".join(final)
        if len(final) > 1:
            result = f"choice({result})"
        if has_nothing:
@ -172,6 +174,7 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
    elif isinstance(rule, parser.SequenceRule):
        final = []
        pieces = []
        queue = []
        queue.append(rule)
        while len(queue) > 0:
@ -179,10 +182,11 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
            if isinstance(part, parser.SequenceRule):
                queue.append(part.second)
                queue.append(part.first)
            elif isinstance(part, parser.NothingRule):
                pass
            else:
-                final.append(part)
+                piece = convert_to_tree_sitter(part, grammar)
                if piece is not None:
                    pieces.append(piece)
                    final.append(part)
        if len(final) == 0:
            raise Exception("Unsupported rule: empty sequence")
@ -196,8 +200,6 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
        #
        #   https://github.com/tree-sitter/tree-sitter/issues/372
        #
        pieces = [convert_to_tree_sitter(r, grammar) for r in final]
        def make_seq(pieces: list[str]):
            if len(pieces) == 1:
                return pieces[0]
@ -223,11 +225,17 @@ def convert_to_tree_sitter(rule: parser.Rule, grammar: parser.Grammar) -> str:
    elif isinstance(rule, parser.MetadataRule):
        result = convert_to_tree_sitter(rule.rule, grammar)
        if result is None:
            return None
        field = rule.metadata.get("field")
        if field is not None:
            result = f"field('{field}', {result})"
        return result
    elif isinstance(rule, parser.NothingRule):
        return None
    else:
        raise ValueError(f"Rule {rule} not supported for tree-sitter")
@ -257,6 +265,8 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
            body = rule.fn(grammar)
            rule_definition = convert_to_tree_sitter(body, grammar)
            if rule_definition is None:
                raise Exception(f"Tree-sitter does not support the empty rule {rule_name}")
            rule_definition = apply_precedence(rule_definition, rule.name, grammar)
            f.write(f"    '{rule_name}': $ => {rule_definition},")
--- a/parser/wadler.py
+++ b/parser/wadler.py
@ -109,14 +109,6 @@ class Matcher:
            current_state = stack[-1][0]
            action = table.actions[current_state].get(current_token[0], parser.Error())
            # print(
            #     "{stack: <30} {input: <15} {action: <5}".format(
            #         stack=repr([s[0] for s in stack[-5:]]),
            #         input=current_token[0],
            #         action=repr(action),
            #     )
            # )
            match action:
                case parser.Accept():
                    return stack[-1][1]
@ -164,13 +156,9 @@ class Matcher:
                    raise Exception("How did I get a parse error here??")
 class PrettyMeta(parser.SyntaxMeta):
    newline: bool
    indent: int | None
    group: bool
 class Printer:
    # TODO: Pre-generate the matcher tables for a grammar, to make it
    #       possible to do codegen in other languages.
    grammar: parser.Grammar
    _matchers: dict[str, Matcher]
    _nonterminals: dict[str, parser.NonTerminal]
@ -227,8 +215,8 @@ class Printer:
                    meta, children = item
                    tx_children = compile_production(children)
-                    pretty = meta.get("prettier")
+                    pretty = meta.get("format")
-                    if isinstance(pretty, PrettyMeta):
+                    if isinstance(pretty, parser.FormatMeta):
                        if pretty.group:
                            # Make a fake rule.
                            rule_name = f"g_{group_count}"
@ -261,8 +249,6 @@ class Printer:
        gen = self.grammar._generator(rule.name, generated_grammar)
        parse_table = gen.gen_table()
        # print(parse_table.format())
        return Matcher(parse_table, indent_amounts)
    def rule_to_matcher(self, rule: parser.NonTerminal) -> Matcher: