diff --git a/grammar.py b/grammar.py index 7d23e87..51c2766 100644 --- a/grammar.py +++ b/grammar.py @@ -53,12 +53,14 @@ class FineGrammar(Grammar): return seq( self.CLASS, mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), - mark(self._class_body, field="body"), + self.LCURLY, + mark(opt(self.class_body), field="body"), + self.RCURLY, ) - @rule - def _class_body(self) -> Rule: - return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._class_members, self.RCURLY) + @rule("ClassBody") + def class_body(self) -> Rule: + return self._class_members @rule def _class_members(self) -> Rule: @@ -140,11 +142,17 @@ class FineGrammar(Grammar): # Block @rule("Block") def block(self) -> Rule: - return ( - seq(self.LCURLY, self.RCURLY) - | seq(self.LCURLY, self.expression, self.RCURLY) - | seq(self.LCURLY, self._statement_list, self.RCURLY) - | seq(self.LCURLY, self._statement_list, self.expression, self.RCURLY) + return alt( + seq(self.LCURLY, self.RCURLY), + seq(self.LCURLY, self.block_body, self.RCURLY), + ) + + @rule("BlockBody") + def block_body(self) -> Rule: + return alt( + self.expression, + self._statement_list, + seq(self._statement_list, self.expression), ) @rule @@ -419,6 +427,7 @@ class FineGrammar(Grammar): if __name__ == "__main__": from pathlib import Path from parser.parser import dump_lexer_table + from parser.emacs import emit_emacs_major_mode from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries grammar = FineGrammar() @@ -427,5 +436,7 @@ if __name__ == "__main__": lexer = grammar.compile_lexer() dump_lexer_table(lexer) - emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine") - emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine") + ts_path = Path(__file__).parent / "tree-sitter-fine" + emit_tree_sitter_grammar(grammar, ts_path) + emit_tree_sitter_queries(grammar, ts_path) + emit_emacs_major_mode(grammar, ts_path / "fine.el") diff --git a/parser/emacs.py b/parser/emacs.py new file mode 100644 index 0000000..1a73d88 --- /dev/null +++ b/parser/emacs.py @@ -0,0 +1,238 @@ +# https://www.masteringemacs.org/article/lets-write-a-treesitter-major-mode +import dataclasses +import itertools +import pathlib +import textwrap + +from parser.tree_sitter import terminal_name +from parser.generated_source import ( + begin_manual_section, + end_manual_section, + merge_existing, + sign_generated_source, + signature_token, +) + +from . import parser + + +@dataclasses.dataclass(frozen=True, order=True) +class FaceQuery: + feature: str # Important to be first! + face: str + node: str + field: str | None + + +def gather_faces(grammar: parser.Grammar): + nts = {nt.name: nt for nt in grammar.non_terminals()} + + def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]: + parts = [] + for item in input: + if isinstance(item, tuple): + meta, sub = item + parts.extend(scoop(node, sub, visited)) + + highlight = meta.get("highlight") + if isinstance(highlight, parser.HighlightMeta): + field_name = meta.get("field") + if not isinstance(field_name, str): + raise Exception("Highlight must come with a field name") # TODO + + feature = highlight.font_lock_feature + face = highlight.font_lock_face + if feature and face: + parts.append( + FaceQuery( + node=node, + field=field_name, + feature=feature, + face=face, + ) + ) + + elif isinstance(item, str): + nt = nts[item] + if nt.transparent: + if nt.name in visited: + continue + visited.add(nt.name) + body = nt.fn(grammar) + for production in body.flatten(with_metadata=True): + parts.extend(scoop(node, production, visited)) + + return parts + + queries: list[FaceQuery] = [] + for rule in grammar.non_terminals(): + if rule.transparent: + continue + + body = rule.fn(grammar) + for production in body.flatten(with_metadata=True): + queries.extend(scoop(rule.name, production, set())) + + for rule in grammar.terminals(): + highlight = rule.meta.get("highlight") + if isinstance(highlight, parser.HighlightMeta): + feature = highlight.font_lock_feature + face = highlight.font_lock_face + if feature and face: + queries.append( + FaceQuery( + node=terminal_name(rule), + field=None, + feature=feature, + face=face, + ) + ) + + # Remove duplicates, which happen. + queries = list(set(queries)) + queries.sort() + + # Group by feature. + features = [] + for feature, qs in itertools.groupby(queries, key=lambda x: x.feature): + feature_group = f":language {grammar.name}\n:override t\n:feature {feature}\n" + + face_queries = [] + for query in qs: + if query.field: + fq = f"({query.node} {query.field}: _ @{query.face})" + else: + fq = f"({query.node}) @{query.face}" + face_queries.append(fq) + + face_queries_str = "\n ".join(face_queries) + feature_group += f"({face_queries_str})\n" + + features.append(feature_group) + + feature_string = "\n".join(features) + feature_string = textwrap.indent(feature_string, " ") + feature_string = feature_string.strip() + + feature_string = f""" +(defvar {grammar.name}-font-lock-rules + '({feature_string}) + "Tree-sitter font lock rules for {grammar.name}.") + """.strip() + + return feature_string + + +def emit_emacs_major_mode(grammar: parser.Grammar, file_path: pathlib.Path | str): + if isinstance(file_path, str): + file_path = pathlib.Path(file_path) + + face_var = gather_faces(grammar) + + contents = f""" +;;; {file_path.name} --- Major mode for editing {grammar.name} --- -*- lexical-binding: t -*- + +;; NOTE: This file is partially generated. +;; Only modify marked sections, or your modifications will be lost! +;; {signature_token()} + +;; {begin_manual_section('commentary')} + +;; This is free and unencumbered software released into the public domain. +;; Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +;; software, either in source code form or as a compiled binary, for any purpose, +;; commercial or non-commercial, and by any means. +;; +;; In jurisdictions that recognize copyright laws, the author or authors of this +;; software dedicate any and all copyright interest in the software to the public +;; domain. We make this dedication for the benefit of the public at large and to +;; the detriment of our heirs and successors. We intend this dedication to be an +;; overt act of relinquishment in perpetuity of all present and future rights to +;; this software under copyright law. +;; +;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +;; AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +;; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +;;; Commentary: +;; (Nobody has written anything about the major mode yet.) + +;; {end_manual_section()} + +;;; Code: +(require 'treesit) + +;; {begin_manual_section('prologue')} + +;; {end_manual_section()} + +{face_var} + +(defun {grammar.name}-ts-setup () + "Setup for {grammar.name}-mode." + + ;; {begin_manual_section('setup_prologue')} + ;; {end_manual_section()} + + ;; Set up the font-lock rules. + (setq-local treesit-font-lock-settings + (apply #'treesit-font-lock-rules + {grammar.name}-font-lock-rules)) + + ;; {begin_manual_section('feature_list')} + ;; NOTE: This list is just to get you started; these are some of the standard + ;; features and somewhat standard positions in the feature list. You can + ;; edit this to more closely match your grammar's output. (The info page + ;; for treesit-font-lock-feature-list describes what it does nicely.) + (setq-local treesit-font-lock-feature-list + '((comment definition) + (keyword string) + (assignment attribute builtin constant escape-sequence number type) + (bracket delimiter error function operator property variable))) + ;; {end_manual_section()} + + ;; {begin_manual_section('setup_epilogue')} + ;; If you want to set up more do it here. + ;; {end_manual_section()} + + (treesit-major-mode-setup)) + +;;;###autoload +(define-derived-mode {grammar.name}-mode prog-mode "{grammar.name}" + "Major mode for editing {grammar.name} files." + + (setq-local font-lock-defaults nil) + (when (treesit-ready-p '{grammar.name}) + (treesit-parser-create '{grammar.name}) + ({grammar.name}-ts-setup))) + + +;; {begin_manual_section('eplogue')} + +;; {end_manual_section()} +;;; {file_path.name} ends here +""".lstrip() + + # Sign the contents to give folks a way to check that they haven't been + # messed with. + contents = sign_generated_source(contents) + + # Try to pull existing file contents out and merge them with the + # generated code. This preserves hand-editing in approved areas. + try: + with open(file_path, "r", encoding="utf-8") as file: + existing_contents = file.read() + contents = merge_existing(existing_contents, contents) + except Exception: + pass + + # Ensure that parent directories are created as necessary for the output. + if not file_path.parent.exists(): + file_path.parent.mkdir(parents=True, exist_ok=True) + + # And write the file! + with open(file_path, "w", encoding="utf-8") as file: + file.write(contents) diff --git a/parser/generated_source.py b/parser/generated_source.py new file mode 100644 index 0000000..0351181 --- /dev/null +++ b/parser/generated_source.py @@ -0,0 +1,107 @@ +import hashlib +import re +import typing + +_SIGNING_SLUG = "!*RVCugYltjOsekrgCXTlKuqIrfy4-ScohO22mEDCr2ts" +_SIGNING_PREFIX = "generated source" + +_BEGIN_PATTERN = re.compile("BEGIN MANUAL SECTION ([^ ]+)") +_END_PATTERN = re.compile("END MANUAL SECTION") +_SIGNATURE_PATTERN = re.compile(_SIGNING_PREFIX + " Signed<<([0-9a-f]+)>>") + + +def signature_token() -> str: + return _SIGNING_PREFIX + " " + _SIGNING_SLUG + + +def begin_manual_section(name: str) -> str: + return f"BEGIN MANUAL SECTION {name}" + + +def end_manual_section() -> str: + return f"END MANUAL SECTION" + + +def _compute_digest(source: str) -> str: + m = hashlib.sha256() + for section, lines in _iterate_sections(source): + if section is None: + for line in lines: + m.update(line.encode("utf-8")) + return m.hexdigest() + + +def sign_generated_source(source: str) -> str: + # Only compute the hash over the automatically generated sections of the + # source file. + digest = _compute_digest(source) + signed = source.replace(_SIGNING_SLUG, f"Signed<<{digest}>>") + if signed == source: + raise ValueError("Source did not contain a signature token to replace") + return signed + + +def is_signed(source: str) -> bool: + return _SIGNATURE_PATTERN.search(source) is not None + + +def validate_signature(source: str) -> bool: + signatures = [m.group(1) for m in _SIGNATURE_PATTERN.finditer(source)] + if len(signatures) > 1: + raise ValueError("Multiple signatures found in source") + if len(signatures) == 0: + raise ValueError("Source does not appear to be signed") + signature: str = signatures[0] + + unsigned = source.replace(f"Signed<<{signature}>>", _SIGNING_SLUG) + actual = _compute_digest(unsigned) + + return signature == actual + + +def merge_existing(existing: str, generated: str) -> str: + manual_sections = _extract_manual_sections(existing) + + result_lines = [] + for section, lines in _iterate_sections(generated): + if section is not None: + lines = manual_sections.get(section, lines) + result_lines.extend(lines) + + return "".join(result_lines) + + +def _extract_manual_sections(code: str) -> dict[str, list[str]]: + result = {} + for section, lines in _iterate_sections(code): + if section is not None: + existing = result.get(section) + if existing is not None: + existing.extend(lines) + else: + result[section] = lines + return result + + +def _iterate_sections(code: str) -> typing.Generator[tuple[str | None, list[str]], None, None]: + current_section: str | None = None + current_lines = [] + for line in code.splitlines(keepends=True): + if current_section is None: + current_lines.append(line) + match = _BEGIN_PATTERN.search(line) + if match is None: + continue + + yield (None, current_lines) + current_lines = [] + current_section = match.group(1) + else: + if _END_PATTERN.search(line): + yield (current_section, current_lines) + current_lines = [] + current_section = None + + current_lines.append(line) + + yield (current_section, current_lines) diff --git a/parser/parser.py b/parser/parser.py index ed67456..42ff058 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -2374,14 +2374,20 @@ class SyntaxMeta: class HighlightMeta(SyntaxMeta): scope: str + font_lock_face: str | None + font_lock_feature: str | None def __init__(self, *scope: str): self.scope = ".".join(scope) + self.font_lock_face = None + self.font_lock_feature = None class CommentHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("comment", *scope) + self.font_lock_face = "font-lock-comment-face" + self.font_lock_feature = "comment" class BlockCommentHighlight(CommentHighlight): @@ -2397,6 +2403,8 @@ class LineCommentHighlight(CommentHighlight): class ConstantHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("constant", *scope) + self.font_lock_face = "font-lock-constant-face" + self.font_lock_feature = "constant" class LanguageConstantHighlight(ConstantHighlight): @@ -2407,6 +2415,8 @@ class LanguageConstantHighlight(ConstantHighlight): class NumericConstantHighlight(ConstantHighlight): def __init__(self, *scope: str): super().__init__("numeric", *scope) + self.font_lock_feature = "number" + self.font_lock_face = "font-lock-number-face" class EntityHighlight(HighlightMeta): @@ -2417,21 +2427,27 @@ class EntityHighlight(HighlightMeta): class NameEntityHighlight(EntityHighlight): def __init__(self, *scope: str): super().__init__("name", *scope) + self.font_lock_face = "font-lock-variable-name-face" + self.font_lock_feature = "definition" class FunctionNameEntityHighlight(NameEntityHighlight): def __init__(self, *scope: str): super().__init__("function", *scope) + self.font_lock_face = "font-lock-function-name-face" class TypeNameEntityHighlight(NameEntityHighlight): def __init__(self, *scope: str): super().__init__("type", *scope) + self.font_lock_feature = "type" class KeywordHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("keyword", *scope) + self.font_lock_feature = "keyword" + self.font_lock_face = "font-lock-keyword-face" class ControlKeywordHighlight(KeywordHighlight): @@ -2447,6 +2463,8 @@ class ConditionalControlKeywordHighlight(ControlKeywordHighlight): class OperatorKeywordHighlight(KeywordHighlight): def __init__(self, *scope: str): super().__init__("operator", *scope) + self.font_lock_feature = "operator" + self.font_lock_face = "font-lock-operator-face" class ExpressionOperatorKeywordHighlight(OperatorKeywordHighlight): @@ -2462,6 +2480,8 @@ class OtherKeywordHighlight(KeywordHighlight): class PunctuationHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("punctuation", *scope) + self.font_lock_feature = "delimiter" + self.font_lock_face = "font-lock-punctuation-face" class SeparatorPunctuationHighlight(PunctuationHighlight): @@ -2472,6 +2492,8 @@ class SeparatorPunctuationHighlight(PunctuationHighlight): class ParenthesisPunctuationHighlight(PunctuationHighlight): def __init__(self, *scope: str): super().__init__("parenthesis", *scope) + self.font_lock_feature = "bracket" + self.font_lock_face = "font-lock-bracket-face" class OpenParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight): @@ -2487,6 +2509,8 @@ class CloseParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight): class CurlyBracePunctuationHighlight(PunctuationHighlight): def __init__(self, *scope: str): super().__init__("curlybrace", *scope) + self.font_lock_feature = "bracket" + self.font_lock_face = "font-lock-bracket-face" class OpenCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight): @@ -2502,6 +2526,8 @@ class CloseCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight): class SquareBracketPunctuationHighlight(PunctuationHighlight): def __init__(self, *scope: str): super().__init__("squarebracket", *scope) + self.font_lock_feature = "bracket" + self.font_lock_face = "font-lock-bracket-face" class OpenSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight): @@ -2517,6 +2543,8 @@ class CloseSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight): class StorageHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("storage", *scope) + self.font_lock_feature = "keyword" + self.font_lock_face = "font-lock-keyword-face" class TypeStorageHighlight(StorageHighlight): @@ -2542,6 +2570,8 @@ class StructTypeStorageHighlight(TypeStorageHighlight): class StringHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("string", *scope) + self.font_lock_feature = "string" + self.font_lock_face = "font-lock-string-face" class QuotedStringHighlight(StringHighlight): @@ -2562,11 +2592,15 @@ class DoubleQuotedStringHighlight(QuotedStringHighlight): class VariableHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("variable", *scope) + self.font_lock_feature = "variable" + self.font_lock_face = "font-lock-variable-use-face" class LanguageVariableHighlight(VariableHighlight): def __init__(self, *scope: str): super().__init__("language", *scope) + self.font_lock_feature = "builtin" + self.font_lock_face = "font-lock-builtin-face" class _Highlight: diff --git a/tests/test_generated_source.py b/tests/test_generated_source.py new file mode 100644 index 0000000..d399474 --- /dev/null +++ b/tests/test_generated_source.py @@ -0,0 +1,110 @@ +import parser.generated_source as generated_source + + +def test_signature(): + input_source = f""" +This is a random thing. + +Put your slug here: {generated_source.signature_token()} + +Here are some more things: + + - Machine Generated + - More Machine Gnerated +{generated_source.begin_manual_section('foo')} + - You can edit here! +{generated_source.end_manual_section()} + - But not here. +{generated_source.begin_manual_section('bar')} + - You can edit here too! +{generated_source.end_manual_section()} + - Also not here. +""" + signed = generated_source.sign_generated_source(input_source) + assert signed != input_source + assert generated_source.is_signed(signed) + assert generated_source.validate_signature(signed) + + +def test_manual_changes(): + input_source = f""" +This is a random thing. + +Put your slug here: {generated_source.signature_token()} + +Here are some more things: + + - Machine Generated + - More Machine Gnerated +{generated_source.begin_manual_section('foo')} + - XXXXX +{generated_source.end_manual_section()} + - But not here. +""" + signed = generated_source.sign_generated_source(input_source) + modified = signed.replace("XXXXX", "YYYYY") + assert modified != signed + + assert generated_source.is_signed(modified) + assert generated_source.validate_signature(modified) + + +def test_bad_changes(): + input_source = f""" +This is a random thing. + +Put your slug here: {generated_source.signature_token()} + +Here are some more things: + + - Machine Generated + - More Machine Gnerated +{generated_source.begin_manual_section('foo')} + - XXXXX +{generated_source.end_manual_section()} + - ZZZZZ +""" + signed = generated_source.sign_generated_source(input_source) + modified = signed.replace("ZZZZZ", "YYYYY") + assert modified != signed + + assert generated_source.is_signed(modified) + assert not generated_source.validate_signature(modified) + + +def test_merge_changes(): + original_source = f""" +A +// {generated_source.begin_manual_section('foo')} +B +// {generated_source.end_manual_section()} +C +// {generated_source.begin_manual_section('bar')} +D +// {generated_source.end_manual_section()} +""" + new_source = f""" +E +// {generated_source.begin_manual_section('bar')} +F +// {generated_source.end_manual_section()} +// {generated_source.begin_manual_section('foo')} +G +// {generated_source.end_manual_section()} +H +""" + + merged = generated_source.merge_existing(original_source, new_source) + assert ( + merged + == f""" +E +// {generated_source.begin_manual_section('bar')} +D +// {generated_source.end_manual_section()} +// {generated_source.begin_manual_section('foo')} +B +// {generated_source.end_manual_section()} +H +""" + )