Compare commits

..

No commits in common. "d7dfd556ec18622077d9dbd834ee9cdb364a2c4c" and "676ddedbaf7e628759fe5e299f2d370728f41982" have entirely different histories.

5 changed files with 11 additions and 511 deletions

View file

@ -53,14 +53,12 @@ class FineGrammar(Grammar):
return seq(
self.CLASS,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
self.LCURLY,
mark(opt(self.class_body), field="body"),
self.RCURLY,
mark(self._class_body, field="body"),
)
@rule("ClassBody")
def class_body(self) -> Rule:
return self._class_members
@rule
def _class_body(self) -> Rule:
return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._class_members, self.RCURLY)
@rule
def _class_members(self) -> Rule:
@ -142,17 +140,11 @@ class FineGrammar(Grammar):
# Block
@rule("Block")
def block(self) -> Rule:
return alt(
seq(self.LCURLY, self.RCURLY),
seq(self.LCURLY, self.block_body, self.RCURLY),
)
@rule("BlockBody")
def block_body(self) -> Rule:
return alt(
self.expression,
self._statement_list,
seq(self._statement_list, self.expression),
return (
seq(self.LCURLY, self.RCURLY)
| seq(self.LCURLY, self.expression, self.RCURLY)
| seq(self.LCURLY, self._statement_list, self.RCURLY)
| seq(self.LCURLY, self._statement_list, self.expression, self.RCURLY)
)
@rule
@ -427,7 +419,6 @@ class FineGrammar(Grammar):
if __name__ == "__main__":
from pathlib import Path
from parser.parser import dump_lexer_table
from parser.emacs import emit_emacs_major_mode
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
grammar = FineGrammar()
@ -436,7 +427,5 @@ if __name__ == "__main__":
lexer = grammar.compile_lexer()
dump_lexer_table(lexer)
ts_path = Path(__file__).parent / "tree-sitter-fine"
emit_tree_sitter_grammar(grammar, ts_path)
emit_tree_sitter_queries(grammar, ts_path)
emit_emacs_major_mode(grammar, ts_path / "fine.el")
emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine")
emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine")

View file

@ -1,238 +0,0 @@
# https://www.masteringemacs.org/article/lets-write-a-treesitter-major-mode
import dataclasses
import itertools
import pathlib
import textwrap
from parser.tree_sitter import terminal_name
from parser.generated_source import (
begin_manual_section,
end_manual_section,
merge_existing,
sign_generated_source,
signature_token,
)
from . import parser
@dataclasses.dataclass(frozen=True, order=True)
class FaceQuery:
feature: str # Important to be first!
face: str
node: str
field: str | None
def gather_faces(grammar: parser.Grammar):
nts = {nt.name: nt for nt in grammar.non_terminals()}
def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]:
parts = []
for item in input:
if isinstance(item, tuple):
meta, sub = item
parts.extend(scoop(node, sub, visited))
highlight = meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
field_name = meta.get("field")
if not isinstance(field_name, str):
raise Exception("Highlight must come with a field name") # TODO
feature = highlight.font_lock_feature
face = highlight.font_lock_face
if feature and face:
parts.append(
FaceQuery(
node=node,
field=field_name,
feature=feature,
face=face,
)
)
elif isinstance(item, str):
nt = nts[item]
if nt.transparent:
if nt.name in visited:
continue
visited.add(nt.name)
body = nt.fn(grammar)
for production in body.flatten(with_metadata=True):
parts.extend(scoop(node, production, visited))
return parts
queries: list[FaceQuery] = []
for rule in grammar.non_terminals():
if rule.transparent:
continue
body = rule.fn(grammar)
for production in body.flatten(with_metadata=True):
queries.extend(scoop(rule.name, production, set()))
for rule in grammar.terminals():
highlight = rule.meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
feature = highlight.font_lock_feature
face = highlight.font_lock_face
if feature and face:
queries.append(
FaceQuery(
node=terminal_name(rule),
field=None,
feature=feature,
face=face,
)
)
# Remove duplicates, which happen.
queries = list(set(queries))
queries.sort()
# Group by feature.
features = []
for feature, qs in itertools.groupby(queries, key=lambda x: x.feature):
feature_group = f":language {grammar.name}\n:override t\n:feature {feature}\n"
face_queries = []
for query in qs:
if query.field:
fq = f"({query.node} {query.field}: _ @{query.face})"
else:
fq = f"({query.node}) @{query.face}"
face_queries.append(fq)
face_queries_str = "\n ".join(face_queries)
feature_group += f"({face_queries_str})\n"
features.append(feature_group)
feature_string = "\n".join(features)
feature_string = textwrap.indent(feature_string, " ")
feature_string = feature_string.strip()
feature_string = f"""
(defvar {grammar.name}-font-lock-rules
'({feature_string})
"Tree-sitter font lock rules for {grammar.name}.")
""".strip()
return feature_string
def emit_emacs_major_mode(grammar: parser.Grammar, file_path: pathlib.Path | str):
if isinstance(file_path, str):
file_path = pathlib.Path(file_path)
face_var = gather_faces(grammar)
contents = f"""
;;; {file_path.name} --- Major mode for editing {grammar.name} --- -*- lexical-binding: t -*-
;; NOTE: This file is partially generated.
;; Only modify marked sections, or your modifications will be lost!
;; {signature_token()}
;; {begin_manual_section('commentary')}
;; This is free and unencumbered software released into the public domain.
;; Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
;; software, either in source code form or as a compiled binary, for any purpose,
;; commercial or non-commercial, and by any means.
;;
;; In jurisdictions that recognize copyright laws, the author or authors of this
;; software dedicate any and all copyright interest in the software to the public
;; domain. We make this dedication for the benefit of the public at large and to
;; the detriment of our heirs and successors. We intend this dedication to be an
;; overt act of relinquishment in perpetuity of all present and future rights to
;; this software under copyright law.
;;
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;; AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
;; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
;;; Commentary:
;; (Nobody has written anything about the major mode yet.)
;; {end_manual_section()}
;;; Code:
(require 'treesit)
;; {begin_manual_section('prologue')}
;; {end_manual_section()}
{face_var}
(defun {grammar.name}-ts-setup ()
"Setup for {grammar.name}-mode."
;; {begin_manual_section('setup_prologue')}
;; {end_manual_section()}
;; Set up the font-lock rules.
(setq-local treesit-font-lock-settings
(apply #'treesit-font-lock-rules
{grammar.name}-font-lock-rules))
;; {begin_manual_section('feature_list')}
;; NOTE: This list is just to get you started; these are some of the standard
;; features and somewhat standard positions in the feature list. You can
;; edit this to more closely match your grammar's output. (The info page
;; for treesit-font-lock-feature-list describes what it does nicely.)
(setq-local treesit-font-lock-feature-list
'((comment definition)
(keyword string)
(assignment attribute builtin constant escape-sequence number type)
(bracket delimiter error function operator property variable)))
;; {end_manual_section()}
;; {begin_manual_section('setup_epilogue')}
;; If you want to set up more do it here.
;; {end_manual_section()}
(treesit-major-mode-setup))
;;;###autoload
(define-derived-mode {grammar.name}-mode prog-mode "{grammar.name}"
"Major mode for editing {grammar.name} files."
(setq-local font-lock-defaults nil)
(when (treesit-ready-p '{grammar.name})
(treesit-parser-create '{grammar.name})
({grammar.name}-ts-setup)))
;; {begin_manual_section('eplogue')}
;; {end_manual_section()}
;;; {file_path.name} ends here
""".lstrip()
# Sign the contents to give folks a way to check that they haven't been
# messed with.
contents = sign_generated_source(contents)
# Try to pull existing file contents out and merge them with the
# generated code. This preserves hand-editing in approved areas.
try:
with open(file_path, "r", encoding="utf-8") as file:
existing_contents = file.read()
contents = merge_existing(existing_contents, contents)
except Exception:
pass
# Ensure that parent directories are created as necessary for the output.
if not file_path.parent.exists():
file_path.parent.mkdir(parents=True, exist_ok=True)
# And write the file!
with open(file_path, "w", encoding="utf-8") as file:
file.write(contents)

View file

@ -1,107 +0,0 @@
import hashlib
import re
import typing
_SIGNING_SLUG = "!*RVCugYltjOsekrgCXTlKuqIrfy4-ScohO22mEDCr2ts"
_SIGNING_PREFIX = "generated source"
_BEGIN_PATTERN = re.compile("BEGIN MANUAL SECTION ([^ ]+)")
_END_PATTERN = re.compile("END MANUAL SECTION")
_SIGNATURE_PATTERN = re.compile(_SIGNING_PREFIX + " Signed<<([0-9a-f]+)>>")
def signature_token() -> str:
return _SIGNING_PREFIX + " " + _SIGNING_SLUG
def begin_manual_section(name: str) -> str:
return f"BEGIN MANUAL SECTION {name}"
def end_manual_section() -> str:
return f"END MANUAL SECTION"
def _compute_digest(source: str) -> str:
m = hashlib.sha256()
for section, lines in _iterate_sections(source):
if section is None:
for line in lines:
m.update(line.encode("utf-8"))
return m.hexdigest()
def sign_generated_source(source: str) -> str:
# Only compute the hash over the automatically generated sections of the
# source file.
digest = _compute_digest(source)
signed = source.replace(_SIGNING_SLUG, f"Signed<<{digest}>>")
if signed == source:
raise ValueError("Source did not contain a signature token to replace")
return signed
def is_signed(source: str) -> bool:
return _SIGNATURE_PATTERN.search(source) is not None
def validate_signature(source: str) -> bool:
signatures = [m.group(1) for m in _SIGNATURE_PATTERN.finditer(source)]
if len(signatures) > 1:
raise ValueError("Multiple signatures found in source")
if len(signatures) == 0:
raise ValueError("Source does not appear to be signed")
signature: str = signatures[0]
unsigned = source.replace(f"Signed<<{signature}>>", _SIGNING_SLUG)
actual = _compute_digest(unsigned)
return signature == actual
def merge_existing(existing: str, generated: str) -> str:
manual_sections = _extract_manual_sections(existing)
result_lines = []
for section, lines in _iterate_sections(generated):
if section is not None:
lines = manual_sections.get(section, lines)
result_lines.extend(lines)
return "".join(result_lines)
def _extract_manual_sections(code: str) -> dict[str, list[str]]:
result = {}
for section, lines in _iterate_sections(code):
if section is not None:
existing = result.get(section)
if existing is not None:
existing.extend(lines)
else:
result[section] = lines
return result
def _iterate_sections(code: str) -> typing.Generator[tuple[str | None, list[str]], None, None]:
current_section: str | None = None
current_lines = []
for line in code.splitlines(keepends=True):
if current_section is None:
current_lines.append(line)
match = _BEGIN_PATTERN.search(line)
if match is None:
continue
yield (None, current_lines)
current_lines = []
current_section = match.group(1)
else:
if _END_PATTERN.search(line):
yield (current_section, current_lines)
current_lines = []
current_section = None
current_lines.append(line)
yield (current_section, current_lines)

View file

@ -2374,20 +2374,14 @@ class SyntaxMeta:
class HighlightMeta(SyntaxMeta):
scope: str
font_lock_face: str | None
font_lock_feature: str | None
def __init__(self, *scope: str):
self.scope = ".".join(scope)
self.font_lock_face = None
self.font_lock_feature = None
class CommentHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("comment", *scope)
self.font_lock_face = "font-lock-comment-face"
self.font_lock_feature = "comment"
class BlockCommentHighlight(CommentHighlight):
@ -2403,8 +2397,6 @@ class LineCommentHighlight(CommentHighlight):
class ConstantHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("constant", *scope)
self.font_lock_face = "font-lock-constant-face"
self.font_lock_feature = "constant"
class LanguageConstantHighlight(ConstantHighlight):
@ -2415,8 +2407,6 @@ class LanguageConstantHighlight(ConstantHighlight):
class NumericConstantHighlight(ConstantHighlight):
def __init__(self, *scope: str):
super().__init__("numeric", *scope)
self.font_lock_feature = "number"
self.font_lock_face = "font-lock-number-face"
class EntityHighlight(HighlightMeta):
@ -2427,27 +2417,21 @@ class EntityHighlight(HighlightMeta):
class NameEntityHighlight(EntityHighlight):
def __init__(self, *scope: str):
super().__init__("name", *scope)
self.font_lock_face = "font-lock-variable-name-face"
self.font_lock_feature = "definition"
class FunctionNameEntityHighlight(NameEntityHighlight):
def __init__(self, *scope: str):
super().__init__("function", *scope)
self.font_lock_face = "font-lock-function-name-face"
class TypeNameEntityHighlight(NameEntityHighlight):
def __init__(self, *scope: str):
super().__init__("type", *scope)
self.font_lock_feature = "type"
class KeywordHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("keyword", *scope)
self.font_lock_feature = "keyword"
self.font_lock_face = "font-lock-keyword-face"
class ControlKeywordHighlight(KeywordHighlight):
@ -2463,8 +2447,6 @@ class ConditionalControlKeywordHighlight(ControlKeywordHighlight):
class OperatorKeywordHighlight(KeywordHighlight):
def __init__(self, *scope: str):
super().__init__("operator", *scope)
self.font_lock_feature = "operator"
self.font_lock_face = "font-lock-operator-face"
class ExpressionOperatorKeywordHighlight(OperatorKeywordHighlight):
@ -2480,8 +2462,6 @@ class OtherKeywordHighlight(KeywordHighlight):
class PunctuationHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("punctuation", *scope)
self.font_lock_feature = "delimiter"
self.font_lock_face = "font-lock-punctuation-face"
class SeparatorPunctuationHighlight(PunctuationHighlight):
@ -2492,8 +2472,6 @@ class SeparatorPunctuationHighlight(PunctuationHighlight):
class ParenthesisPunctuationHighlight(PunctuationHighlight):
def __init__(self, *scope: str):
super().__init__("parenthesis", *scope)
self.font_lock_feature = "bracket"
self.font_lock_face = "font-lock-bracket-face"
class OpenParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight):
@ -2509,8 +2487,6 @@ class CloseParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight):
class CurlyBracePunctuationHighlight(PunctuationHighlight):
def __init__(self, *scope: str):
super().__init__("curlybrace", *scope)
self.font_lock_feature = "bracket"
self.font_lock_face = "font-lock-bracket-face"
class OpenCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight):
@ -2526,8 +2502,6 @@ class CloseCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight):
class SquareBracketPunctuationHighlight(PunctuationHighlight):
def __init__(self, *scope: str):
super().__init__("squarebracket", *scope)
self.font_lock_feature = "bracket"
self.font_lock_face = "font-lock-bracket-face"
class OpenSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight):
@ -2543,8 +2517,6 @@ class CloseSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight):
class StorageHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("storage", *scope)
self.font_lock_feature = "keyword"
self.font_lock_face = "font-lock-keyword-face"
class TypeStorageHighlight(StorageHighlight):
@ -2570,8 +2542,6 @@ class StructTypeStorageHighlight(TypeStorageHighlight):
class StringHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("string", *scope)
self.font_lock_feature = "string"
self.font_lock_face = "font-lock-string-face"
class QuotedStringHighlight(StringHighlight):
@ -2592,15 +2562,11 @@ class DoubleQuotedStringHighlight(QuotedStringHighlight):
class VariableHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("variable", *scope)
self.font_lock_feature = "variable"
self.font_lock_face = "font-lock-variable-use-face"
class LanguageVariableHighlight(VariableHighlight):
def __init__(self, *scope: str):
super().__init__("language", *scope)
self.font_lock_feature = "builtin"
self.font_lock_face = "font-lock-builtin-face"
class _Highlight:

View file

@ -1,110 +0,0 @@
import parser.generated_source as generated_source
def test_signature():
input_source = f"""
This is a random thing.
Put your slug here: {generated_source.signature_token()}
Here are some more things:
- Machine Generated
- More Machine Gnerated
{generated_source.begin_manual_section('foo')}
- You can edit here!
{generated_source.end_manual_section()}
- But not here.
{generated_source.begin_manual_section('bar')}
- You can edit here too!
{generated_source.end_manual_section()}
- Also not here.
"""
signed = generated_source.sign_generated_source(input_source)
assert signed != input_source
assert generated_source.is_signed(signed)
assert generated_source.validate_signature(signed)
def test_manual_changes():
input_source = f"""
This is a random thing.
Put your slug here: {generated_source.signature_token()}
Here are some more things:
- Machine Generated
- More Machine Gnerated
{generated_source.begin_manual_section('foo')}
- XXXXX
{generated_source.end_manual_section()}
- But not here.
"""
signed = generated_source.sign_generated_source(input_source)
modified = signed.replace("XXXXX", "YYYYY")
assert modified != signed
assert generated_source.is_signed(modified)
assert generated_source.validate_signature(modified)
def test_bad_changes():
input_source = f"""
This is a random thing.
Put your slug here: {generated_source.signature_token()}
Here are some more things:
- Machine Generated
- More Machine Gnerated
{generated_source.begin_manual_section('foo')}
- XXXXX
{generated_source.end_manual_section()}
- ZZZZZ
"""
signed = generated_source.sign_generated_source(input_source)
modified = signed.replace("ZZZZZ", "YYYYY")
assert modified != signed
assert generated_source.is_signed(modified)
assert not generated_source.validate_signature(modified)
def test_merge_changes():
original_source = f"""
A
// {generated_source.begin_manual_section('foo')}
B
// {generated_source.end_manual_section()}
C
// {generated_source.begin_manual_section('bar')}
D
// {generated_source.end_manual_section()}
"""
new_source = f"""
E
// {generated_source.begin_manual_section('bar')}
F
// {generated_source.end_manual_section()}
// {generated_source.begin_manual_section('foo')}
G
// {generated_source.end_manual_section()}
H
"""
merged = generated_source.merge_existing(original_source, new_source)
assert (
merged
== f"""
E
// {generated_source.begin_manual_section('bar')}
D
// {generated_source.end_manual_section()}
// {generated_source.begin_manual_section('foo')}
B
// {generated_source.end_manual_section()}
H
"""
)