diff --git a/grammar.py b/grammar.py index 51c2766..7d23e87 100644 --- a/grammar.py +++ b/grammar.py @@ -53,14 +53,12 @@ class FineGrammar(Grammar): return seq( self.CLASS, mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), - self.LCURLY, - mark(opt(self.class_body), field="body"), - self.RCURLY, + mark(self._class_body, field="body"), ) - @rule("ClassBody") - def class_body(self) -> Rule: - return self._class_members + @rule + def _class_body(self) -> Rule: + return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._class_members, self.RCURLY) @rule def _class_members(self) -> Rule: @@ -142,17 +140,11 @@ class FineGrammar(Grammar): # Block @rule("Block") def block(self) -> Rule: - return alt( - seq(self.LCURLY, self.RCURLY), - seq(self.LCURLY, self.block_body, self.RCURLY), - ) - - @rule("BlockBody") - def block_body(self) -> Rule: - return alt( - self.expression, - self._statement_list, - seq(self._statement_list, self.expression), + return ( + seq(self.LCURLY, self.RCURLY) + | seq(self.LCURLY, self.expression, self.RCURLY) + | seq(self.LCURLY, self._statement_list, self.RCURLY) + | seq(self.LCURLY, self._statement_list, self.expression, self.RCURLY) ) @rule @@ -427,7 +419,6 @@ class FineGrammar(Grammar): if __name__ == "__main__": from pathlib import Path from parser.parser import dump_lexer_table - from parser.emacs import emit_emacs_major_mode from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries grammar = FineGrammar() @@ -436,7 +427,5 @@ if __name__ == "__main__": lexer = grammar.compile_lexer() dump_lexer_table(lexer) - ts_path = Path(__file__).parent / "tree-sitter-fine" - emit_tree_sitter_grammar(grammar, ts_path) - emit_tree_sitter_queries(grammar, ts_path) - emit_emacs_major_mode(grammar, ts_path / "fine.el") + emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine") + emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine") diff --git a/parser/emacs.py b/parser/emacs.py deleted file mode 100644 index 1a73d88..0000000 --- a/parser/emacs.py +++ /dev/null @@ -1,238 +0,0 @@ -# https://www.masteringemacs.org/article/lets-write-a-treesitter-major-mode -import dataclasses -import itertools -import pathlib -import textwrap - -from parser.tree_sitter import terminal_name -from parser.generated_source import ( - begin_manual_section, - end_manual_section, - merge_existing, - sign_generated_source, - signature_token, -) - -from . import parser - - -@dataclasses.dataclass(frozen=True, order=True) -class FaceQuery: - feature: str # Important to be first! - face: str - node: str - field: str | None - - -def gather_faces(grammar: parser.Grammar): - nts = {nt.name: nt for nt in grammar.non_terminals()} - - def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]: - parts = [] - for item in input: - if isinstance(item, tuple): - meta, sub = item - parts.extend(scoop(node, sub, visited)) - - highlight = meta.get("highlight") - if isinstance(highlight, parser.HighlightMeta): - field_name = meta.get("field") - if not isinstance(field_name, str): - raise Exception("Highlight must come with a field name") # TODO - - feature = highlight.font_lock_feature - face = highlight.font_lock_face - if feature and face: - parts.append( - FaceQuery( - node=node, - field=field_name, - feature=feature, - face=face, - ) - ) - - elif isinstance(item, str): - nt = nts[item] - if nt.transparent: - if nt.name in visited: - continue - visited.add(nt.name) - body = nt.fn(grammar) - for production in body.flatten(with_metadata=True): - parts.extend(scoop(node, production, visited)) - - return parts - - queries: list[FaceQuery] = [] - for rule in grammar.non_terminals(): - if rule.transparent: - continue - - body = rule.fn(grammar) - for production in body.flatten(with_metadata=True): - queries.extend(scoop(rule.name, production, set())) - - for rule in grammar.terminals(): - highlight = rule.meta.get("highlight") - if isinstance(highlight, parser.HighlightMeta): - feature = highlight.font_lock_feature - face = highlight.font_lock_face - if feature and face: - queries.append( - FaceQuery( - node=terminal_name(rule), - field=None, - feature=feature, - face=face, - ) - ) - - # Remove duplicates, which happen. - queries = list(set(queries)) - queries.sort() - - # Group by feature. - features = [] - for feature, qs in itertools.groupby(queries, key=lambda x: x.feature): - feature_group = f":language {grammar.name}\n:override t\n:feature {feature}\n" - - face_queries = [] - for query in qs: - if query.field: - fq = f"({query.node} {query.field}: _ @{query.face})" - else: - fq = f"({query.node}) @{query.face}" - face_queries.append(fq) - - face_queries_str = "\n ".join(face_queries) - feature_group += f"({face_queries_str})\n" - - features.append(feature_group) - - feature_string = "\n".join(features) - feature_string = textwrap.indent(feature_string, " ") - feature_string = feature_string.strip() - - feature_string = f""" -(defvar {grammar.name}-font-lock-rules - '({feature_string}) - "Tree-sitter font lock rules for {grammar.name}.") - """.strip() - - return feature_string - - -def emit_emacs_major_mode(grammar: parser.Grammar, file_path: pathlib.Path | str): - if isinstance(file_path, str): - file_path = pathlib.Path(file_path) - - face_var = gather_faces(grammar) - - contents = f""" -;;; {file_path.name} --- Major mode for editing {grammar.name} --- -*- lexical-binding: t -*- - -;; NOTE: This file is partially generated. -;; Only modify marked sections, or your modifications will be lost! -;; {signature_token()} - -;; {begin_manual_section('commentary')} - -;; This is free and unencumbered software released into the public domain. -;; Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -;; software, either in source code form or as a compiled binary, for any purpose, -;; commercial or non-commercial, and by any means. -;; -;; In jurisdictions that recognize copyright laws, the author or authors of this -;; software dedicate any and all copyright interest in the software to the public -;; domain. We make this dedication for the benefit of the public at large and to -;; the detriment of our heirs and successors. We intend this dedication to be an -;; overt act of relinquishment in perpetuity of all present and future rights to -;; this software under copyright law. -;; -;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -;; AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -;; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -;;; Commentary: -;; (Nobody has written anything about the major mode yet.) - -;; {end_manual_section()} - -;;; Code: -(require 'treesit) - -;; {begin_manual_section('prologue')} - -;; {end_manual_section()} - -{face_var} - -(defun {grammar.name}-ts-setup () - "Setup for {grammar.name}-mode." - - ;; {begin_manual_section('setup_prologue')} - ;; {end_manual_section()} - - ;; Set up the font-lock rules. - (setq-local treesit-font-lock-settings - (apply #'treesit-font-lock-rules - {grammar.name}-font-lock-rules)) - - ;; {begin_manual_section('feature_list')} - ;; NOTE: This list is just to get you started; these are some of the standard - ;; features and somewhat standard positions in the feature list. You can - ;; edit this to more closely match your grammar's output. (The info page - ;; for treesit-font-lock-feature-list describes what it does nicely.) - (setq-local treesit-font-lock-feature-list - '((comment definition) - (keyword string) - (assignment attribute builtin constant escape-sequence number type) - (bracket delimiter error function operator property variable))) - ;; {end_manual_section()} - - ;; {begin_manual_section('setup_epilogue')} - ;; If you want to set up more do it here. - ;; {end_manual_section()} - - (treesit-major-mode-setup)) - -;;;###autoload -(define-derived-mode {grammar.name}-mode prog-mode "{grammar.name}" - "Major mode for editing {grammar.name} files." - - (setq-local font-lock-defaults nil) - (when (treesit-ready-p '{grammar.name}) - (treesit-parser-create '{grammar.name}) - ({grammar.name}-ts-setup))) - - -;; {begin_manual_section('eplogue')} - -;; {end_manual_section()} -;;; {file_path.name} ends here -""".lstrip() - - # Sign the contents to give folks a way to check that they haven't been - # messed with. - contents = sign_generated_source(contents) - - # Try to pull existing file contents out and merge them with the - # generated code. This preserves hand-editing in approved areas. - try: - with open(file_path, "r", encoding="utf-8") as file: - existing_contents = file.read() - contents = merge_existing(existing_contents, contents) - except Exception: - pass - - # Ensure that parent directories are created as necessary for the output. - if not file_path.parent.exists(): - file_path.parent.mkdir(parents=True, exist_ok=True) - - # And write the file! - with open(file_path, "w", encoding="utf-8") as file: - file.write(contents) diff --git a/parser/generated_source.py b/parser/generated_source.py deleted file mode 100644 index 0351181..0000000 --- a/parser/generated_source.py +++ /dev/null @@ -1,107 +0,0 @@ -import hashlib -import re -import typing - -_SIGNING_SLUG = "!*RVCugYltjOsekrgCXTlKuqIrfy4-ScohO22mEDCr2ts" -_SIGNING_PREFIX = "generated source" - -_BEGIN_PATTERN = re.compile("BEGIN MANUAL SECTION ([^ ]+)") -_END_PATTERN = re.compile("END MANUAL SECTION") -_SIGNATURE_PATTERN = re.compile(_SIGNING_PREFIX + " Signed<<([0-9a-f]+)>>") - - -def signature_token() -> str: - return _SIGNING_PREFIX + " " + _SIGNING_SLUG - - -def begin_manual_section(name: str) -> str: - return f"BEGIN MANUAL SECTION {name}" - - -def end_manual_section() -> str: - return f"END MANUAL SECTION" - - -def _compute_digest(source: str) -> str: - m = hashlib.sha256() - for section, lines in _iterate_sections(source): - if section is None: - for line in lines: - m.update(line.encode("utf-8")) - return m.hexdigest() - - -def sign_generated_source(source: str) -> str: - # Only compute the hash over the automatically generated sections of the - # source file. - digest = _compute_digest(source) - signed = source.replace(_SIGNING_SLUG, f"Signed<<{digest}>>") - if signed == source: - raise ValueError("Source did not contain a signature token to replace") - return signed - - -def is_signed(source: str) -> bool: - return _SIGNATURE_PATTERN.search(source) is not None - - -def validate_signature(source: str) -> bool: - signatures = [m.group(1) for m in _SIGNATURE_PATTERN.finditer(source)] - if len(signatures) > 1: - raise ValueError("Multiple signatures found in source") - if len(signatures) == 0: - raise ValueError("Source does not appear to be signed") - signature: str = signatures[0] - - unsigned = source.replace(f"Signed<<{signature}>>", _SIGNING_SLUG) - actual = _compute_digest(unsigned) - - return signature == actual - - -def merge_existing(existing: str, generated: str) -> str: - manual_sections = _extract_manual_sections(existing) - - result_lines = [] - for section, lines in _iterate_sections(generated): - if section is not None: - lines = manual_sections.get(section, lines) - result_lines.extend(lines) - - return "".join(result_lines) - - -def _extract_manual_sections(code: str) -> dict[str, list[str]]: - result = {} - for section, lines in _iterate_sections(code): - if section is not None: - existing = result.get(section) - if existing is not None: - existing.extend(lines) - else: - result[section] = lines - return result - - -def _iterate_sections(code: str) -> typing.Generator[tuple[str | None, list[str]], None, None]: - current_section: str | None = None - current_lines = [] - for line in code.splitlines(keepends=True): - if current_section is None: - current_lines.append(line) - match = _BEGIN_PATTERN.search(line) - if match is None: - continue - - yield (None, current_lines) - current_lines = [] - current_section = match.group(1) - else: - if _END_PATTERN.search(line): - yield (current_section, current_lines) - current_lines = [] - current_section = None - - current_lines.append(line) - - yield (current_section, current_lines) diff --git a/parser/parser.py b/parser/parser.py index 42ff058..ed67456 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -2374,20 +2374,14 @@ class SyntaxMeta: class HighlightMeta(SyntaxMeta): scope: str - font_lock_face: str | None - font_lock_feature: str | None def __init__(self, *scope: str): self.scope = ".".join(scope) - self.font_lock_face = None - self.font_lock_feature = None class CommentHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("comment", *scope) - self.font_lock_face = "font-lock-comment-face" - self.font_lock_feature = "comment" class BlockCommentHighlight(CommentHighlight): @@ -2403,8 +2397,6 @@ class LineCommentHighlight(CommentHighlight): class ConstantHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("constant", *scope) - self.font_lock_face = "font-lock-constant-face" - self.font_lock_feature = "constant" class LanguageConstantHighlight(ConstantHighlight): @@ -2415,8 +2407,6 @@ class LanguageConstantHighlight(ConstantHighlight): class NumericConstantHighlight(ConstantHighlight): def __init__(self, *scope: str): super().__init__("numeric", *scope) - self.font_lock_feature = "number" - self.font_lock_face = "font-lock-number-face" class EntityHighlight(HighlightMeta): @@ -2427,27 +2417,21 @@ class EntityHighlight(HighlightMeta): class NameEntityHighlight(EntityHighlight): def __init__(self, *scope: str): super().__init__("name", *scope) - self.font_lock_face = "font-lock-variable-name-face" - self.font_lock_feature = "definition" class FunctionNameEntityHighlight(NameEntityHighlight): def __init__(self, *scope: str): super().__init__("function", *scope) - self.font_lock_face = "font-lock-function-name-face" class TypeNameEntityHighlight(NameEntityHighlight): def __init__(self, *scope: str): super().__init__("type", *scope) - self.font_lock_feature = "type" class KeywordHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("keyword", *scope) - self.font_lock_feature = "keyword" - self.font_lock_face = "font-lock-keyword-face" class ControlKeywordHighlight(KeywordHighlight): @@ -2463,8 +2447,6 @@ class ConditionalControlKeywordHighlight(ControlKeywordHighlight): class OperatorKeywordHighlight(KeywordHighlight): def __init__(self, *scope: str): super().__init__("operator", *scope) - self.font_lock_feature = "operator" - self.font_lock_face = "font-lock-operator-face" class ExpressionOperatorKeywordHighlight(OperatorKeywordHighlight): @@ -2480,8 +2462,6 @@ class OtherKeywordHighlight(KeywordHighlight): class PunctuationHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("punctuation", *scope) - self.font_lock_feature = "delimiter" - self.font_lock_face = "font-lock-punctuation-face" class SeparatorPunctuationHighlight(PunctuationHighlight): @@ -2492,8 +2472,6 @@ class SeparatorPunctuationHighlight(PunctuationHighlight): class ParenthesisPunctuationHighlight(PunctuationHighlight): def __init__(self, *scope: str): super().__init__("parenthesis", *scope) - self.font_lock_feature = "bracket" - self.font_lock_face = "font-lock-bracket-face" class OpenParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight): @@ -2509,8 +2487,6 @@ class CloseParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight): class CurlyBracePunctuationHighlight(PunctuationHighlight): def __init__(self, *scope: str): super().__init__("curlybrace", *scope) - self.font_lock_feature = "bracket" - self.font_lock_face = "font-lock-bracket-face" class OpenCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight): @@ -2526,8 +2502,6 @@ class CloseCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight): class SquareBracketPunctuationHighlight(PunctuationHighlight): def __init__(self, *scope: str): super().__init__("squarebracket", *scope) - self.font_lock_feature = "bracket" - self.font_lock_face = "font-lock-bracket-face" class OpenSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight): @@ -2543,8 +2517,6 @@ class CloseSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight): class StorageHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("storage", *scope) - self.font_lock_feature = "keyword" - self.font_lock_face = "font-lock-keyword-face" class TypeStorageHighlight(StorageHighlight): @@ -2570,8 +2542,6 @@ class StructTypeStorageHighlight(TypeStorageHighlight): class StringHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("string", *scope) - self.font_lock_feature = "string" - self.font_lock_face = "font-lock-string-face" class QuotedStringHighlight(StringHighlight): @@ -2592,15 +2562,11 @@ class DoubleQuotedStringHighlight(QuotedStringHighlight): class VariableHighlight(HighlightMeta): def __init__(self, *scope: str): super().__init__("variable", *scope) - self.font_lock_feature = "variable" - self.font_lock_face = "font-lock-variable-use-face" class LanguageVariableHighlight(VariableHighlight): def __init__(self, *scope: str): super().__init__("language", *scope) - self.font_lock_feature = "builtin" - self.font_lock_face = "font-lock-builtin-face" class _Highlight: diff --git a/tests/test_generated_source.py b/tests/test_generated_source.py deleted file mode 100644 index d399474..0000000 --- a/tests/test_generated_source.py +++ /dev/null @@ -1,110 +0,0 @@ -import parser.generated_source as generated_source - - -def test_signature(): - input_source = f""" -This is a random thing. - -Put your slug here: {generated_source.signature_token()} - -Here are some more things: - - - Machine Generated - - More Machine Gnerated -{generated_source.begin_manual_section('foo')} - - You can edit here! -{generated_source.end_manual_section()} - - But not here. -{generated_source.begin_manual_section('bar')} - - You can edit here too! -{generated_source.end_manual_section()} - - Also not here. -""" - signed = generated_source.sign_generated_source(input_source) - assert signed != input_source - assert generated_source.is_signed(signed) - assert generated_source.validate_signature(signed) - - -def test_manual_changes(): - input_source = f""" -This is a random thing. - -Put your slug here: {generated_source.signature_token()} - -Here are some more things: - - - Machine Generated - - More Machine Gnerated -{generated_source.begin_manual_section('foo')} - - XXXXX -{generated_source.end_manual_section()} - - But not here. -""" - signed = generated_source.sign_generated_source(input_source) - modified = signed.replace("XXXXX", "YYYYY") - assert modified != signed - - assert generated_source.is_signed(modified) - assert generated_source.validate_signature(modified) - - -def test_bad_changes(): - input_source = f""" -This is a random thing. - -Put your slug here: {generated_source.signature_token()} - -Here are some more things: - - - Machine Generated - - More Machine Gnerated -{generated_source.begin_manual_section('foo')} - - XXXXX -{generated_source.end_manual_section()} - - ZZZZZ -""" - signed = generated_source.sign_generated_source(input_source) - modified = signed.replace("ZZZZZ", "YYYYY") - assert modified != signed - - assert generated_source.is_signed(modified) - assert not generated_source.validate_signature(modified) - - -def test_merge_changes(): - original_source = f""" -A -// {generated_source.begin_manual_section('foo')} -B -// {generated_source.end_manual_section()} -C -// {generated_source.begin_manual_section('bar')} -D -// {generated_source.end_manual_section()} -""" - new_source = f""" -E -// {generated_source.begin_manual_section('bar')} -F -// {generated_source.end_manual_section()} -// {generated_source.begin_manual_section('foo')} -G -// {generated_source.end_manual_section()} -H -""" - - merged = generated_source.merge_existing(original_source, new_source) - assert ( - merged - == f""" -E -// {generated_source.begin_manual_section('bar')} -D -// {generated_source.end_manual_section()} -// {generated_source.begin_manual_section('foo')} -B -// {generated_source.end_manual_section()} -H -""" - )