Compare commits

...

5 commits

Author SHA1 Message Date
d7dfd556ec Emit an emacs major mode
With coloring! Next up: formatting but that might be hard.
2024-09-06 11:51:09 -07:00
4941cd049c Helper routines for generating source code
This includes "signing" source to detect modifications, and
maintaining user-modified sections. Hooray!
2024-09-06 11:50:17 -07:00
0243b0bf77 Refactor grammar to make "nicer" trees (I guess) 2024-09-06 10:21:00 -07:00
23981f82ce Start working on emacs mode generation 2024-09-06 10:20:34 -07:00
501c2e3fbe Teach the highlight meta about emacs face names 2024-09-06 10:20:17 -07:00
5 changed files with 511 additions and 11 deletions

View file

@ -53,12 +53,14 @@ class FineGrammar(Grammar):
return seq(
self.CLASS,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
mark(self._class_body, field="body"),
self.LCURLY,
mark(opt(self.class_body), field="body"),
self.RCURLY,
)
@rule
def _class_body(self) -> Rule:
return seq(self.LCURLY, self.RCURLY) | seq(self.LCURLY, self._class_members, self.RCURLY)
@rule("ClassBody")
def class_body(self) -> Rule:
return self._class_members
@rule
def _class_members(self) -> Rule:
@ -140,11 +142,17 @@ class FineGrammar(Grammar):
# Block
@rule("Block")
def block(self) -> Rule:
return (
seq(self.LCURLY, self.RCURLY)
| seq(self.LCURLY, self.expression, self.RCURLY)
| seq(self.LCURLY, self._statement_list, self.RCURLY)
| seq(self.LCURLY, self._statement_list, self.expression, self.RCURLY)
return alt(
seq(self.LCURLY, self.RCURLY),
seq(self.LCURLY, self.block_body, self.RCURLY),
)
@rule("BlockBody")
def block_body(self) -> Rule:
return alt(
self.expression,
self._statement_list,
seq(self._statement_list, self.expression),
)
@rule
@ -419,6 +427,7 @@ class FineGrammar(Grammar):
if __name__ == "__main__":
from pathlib import Path
from parser.parser import dump_lexer_table
from parser.emacs import emit_emacs_major_mode
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
grammar = FineGrammar()
@ -427,5 +436,7 @@ if __name__ == "__main__":
lexer = grammar.compile_lexer()
dump_lexer_table(lexer)
emit_tree_sitter_grammar(grammar, Path(__file__).parent / "tree-sitter-fine")
emit_tree_sitter_queries(grammar, Path(__file__).parent / "tree-sitter-fine")
ts_path = Path(__file__).parent / "tree-sitter-fine"
emit_tree_sitter_grammar(grammar, ts_path)
emit_tree_sitter_queries(grammar, ts_path)
emit_emacs_major_mode(grammar, ts_path / "fine.el")

238
parser/emacs.py Normal file
View file

@ -0,0 +1,238 @@
# https://www.masteringemacs.org/article/lets-write-a-treesitter-major-mode
import dataclasses
import itertools
import pathlib
import textwrap
from parser.tree_sitter import terminal_name
from parser.generated_source import (
begin_manual_section,
end_manual_section,
merge_existing,
sign_generated_source,
signature_token,
)
from . import parser
@dataclasses.dataclass(frozen=True, order=True)
class FaceQuery:
feature: str # Important to be first!
face: str
node: str
field: str | None
def gather_faces(grammar: parser.Grammar):
nts = {nt.name: nt for nt in grammar.non_terminals()}
def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]:
parts = []
for item in input:
if isinstance(item, tuple):
meta, sub = item
parts.extend(scoop(node, sub, visited))
highlight = meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
field_name = meta.get("field")
if not isinstance(field_name, str):
raise Exception("Highlight must come with a field name") # TODO
feature = highlight.font_lock_feature
face = highlight.font_lock_face
if feature and face:
parts.append(
FaceQuery(
node=node,
field=field_name,
feature=feature,
face=face,
)
)
elif isinstance(item, str):
nt = nts[item]
if nt.transparent:
if nt.name in visited:
continue
visited.add(nt.name)
body = nt.fn(grammar)
for production in body.flatten(with_metadata=True):
parts.extend(scoop(node, production, visited))
return parts
queries: list[FaceQuery] = []
for rule in grammar.non_terminals():
if rule.transparent:
continue
body = rule.fn(grammar)
for production in body.flatten(with_metadata=True):
queries.extend(scoop(rule.name, production, set()))
for rule in grammar.terminals():
highlight = rule.meta.get("highlight")
if isinstance(highlight, parser.HighlightMeta):
feature = highlight.font_lock_feature
face = highlight.font_lock_face
if feature and face:
queries.append(
FaceQuery(
node=terminal_name(rule),
field=None,
feature=feature,
face=face,
)
)
# Remove duplicates, which happen.
queries = list(set(queries))
queries.sort()
# Group by feature.
features = []
for feature, qs in itertools.groupby(queries, key=lambda x: x.feature):
feature_group = f":language {grammar.name}\n:override t\n:feature {feature}\n"
face_queries = []
for query in qs:
if query.field:
fq = f"({query.node} {query.field}: _ @{query.face})"
else:
fq = f"({query.node}) @{query.face}"
face_queries.append(fq)
face_queries_str = "\n ".join(face_queries)
feature_group += f"({face_queries_str})\n"
features.append(feature_group)
feature_string = "\n".join(features)
feature_string = textwrap.indent(feature_string, " ")
feature_string = feature_string.strip()
feature_string = f"""
(defvar {grammar.name}-font-lock-rules
'({feature_string})
"Tree-sitter font lock rules for {grammar.name}.")
""".strip()
return feature_string
def emit_emacs_major_mode(grammar: parser.Grammar, file_path: pathlib.Path | str):
if isinstance(file_path, str):
file_path = pathlib.Path(file_path)
face_var = gather_faces(grammar)
contents = f"""
;;; {file_path.name} --- Major mode for editing {grammar.name} --- -*- lexical-binding: t -*-
;; NOTE: This file is partially generated.
;; Only modify marked sections, or your modifications will be lost!
;; {signature_token()}
;; {begin_manual_section('commentary')}
;; This is free and unencumbered software released into the public domain.
;; Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
;; software, either in source code form or as a compiled binary, for any purpose,
;; commercial or non-commercial, and by any means.
;;
;; In jurisdictions that recognize copyright laws, the author or authors of this
;; software dedicate any and all copyright interest in the software to the public
;; domain. We make this dedication for the benefit of the public at large and to
;; the detriment of our heirs and successors. We intend this dedication to be an
;; overt act of relinquishment in perpetuity of all present and future rights to
;; this software under copyright law.
;;
;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
;; AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
;; ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
;; WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
;;; Commentary:
;; (Nobody has written anything about the major mode yet.)
;; {end_manual_section()}
;;; Code:
(require 'treesit)
;; {begin_manual_section('prologue')}
;; {end_manual_section()}
{face_var}
(defun {grammar.name}-ts-setup ()
"Setup for {grammar.name}-mode."
;; {begin_manual_section('setup_prologue')}
;; {end_manual_section()}
;; Set up the font-lock rules.
(setq-local treesit-font-lock-settings
(apply #'treesit-font-lock-rules
{grammar.name}-font-lock-rules))
;; {begin_manual_section('feature_list')}
;; NOTE: This list is just to get you started; these are some of the standard
;; features and somewhat standard positions in the feature list. You can
;; edit this to more closely match your grammar's output. (The info page
;; for treesit-font-lock-feature-list describes what it does nicely.)
(setq-local treesit-font-lock-feature-list
'((comment definition)
(keyword string)
(assignment attribute builtin constant escape-sequence number type)
(bracket delimiter error function operator property variable)))
;; {end_manual_section()}
;; {begin_manual_section('setup_epilogue')}
;; If you want to set up more do it here.
;; {end_manual_section()}
(treesit-major-mode-setup))
;;;###autoload
(define-derived-mode {grammar.name}-mode prog-mode "{grammar.name}"
"Major mode for editing {grammar.name} files."
(setq-local font-lock-defaults nil)
(when (treesit-ready-p '{grammar.name})
(treesit-parser-create '{grammar.name})
({grammar.name}-ts-setup)))
;; {begin_manual_section('eplogue')}
;; {end_manual_section()}
;;; {file_path.name} ends here
""".lstrip()
# Sign the contents to give folks a way to check that they haven't been
# messed with.
contents = sign_generated_source(contents)
# Try to pull existing file contents out and merge them with the
# generated code. This preserves hand-editing in approved areas.
try:
with open(file_path, "r", encoding="utf-8") as file:
existing_contents = file.read()
contents = merge_existing(existing_contents, contents)
except Exception:
pass
# Ensure that parent directories are created as necessary for the output.
if not file_path.parent.exists():
file_path.parent.mkdir(parents=True, exist_ok=True)
# And write the file!
with open(file_path, "w", encoding="utf-8") as file:
file.write(contents)

107
parser/generated_source.py Normal file
View file

@ -0,0 +1,107 @@
import hashlib
import re
import typing
_SIGNING_SLUG = "!*RVCugYltjOsekrgCXTlKuqIrfy4-ScohO22mEDCr2ts"
_SIGNING_PREFIX = "generated source"
_BEGIN_PATTERN = re.compile("BEGIN MANUAL SECTION ([^ ]+)")
_END_PATTERN = re.compile("END MANUAL SECTION")
_SIGNATURE_PATTERN = re.compile(_SIGNING_PREFIX + " Signed<<([0-9a-f]+)>>")
def signature_token() -> str:
return _SIGNING_PREFIX + " " + _SIGNING_SLUG
def begin_manual_section(name: str) -> str:
return f"BEGIN MANUAL SECTION {name}"
def end_manual_section() -> str:
return f"END MANUAL SECTION"
def _compute_digest(source: str) -> str:
m = hashlib.sha256()
for section, lines in _iterate_sections(source):
if section is None:
for line in lines:
m.update(line.encode("utf-8"))
return m.hexdigest()
def sign_generated_source(source: str) -> str:
# Only compute the hash over the automatically generated sections of the
# source file.
digest = _compute_digest(source)
signed = source.replace(_SIGNING_SLUG, f"Signed<<{digest}>>")
if signed == source:
raise ValueError("Source did not contain a signature token to replace")
return signed
def is_signed(source: str) -> bool:
return _SIGNATURE_PATTERN.search(source) is not None
def validate_signature(source: str) -> bool:
signatures = [m.group(1) for m in _SIGNATURE_PATTERN.finditer(source)]
if len(signatures) > 1:
raise ValueError("Multiple signatures found in source")
if len(signatures) == 0:
raise ValueError("Source does not appear to be signed")
signature: str = signatures[0]
unsigned = source.replace(f"Signed<<{signature}>>", _SIGNING_SLUG)
actual = _compute_digest(unsigned)
return signature == actual
def merge_existing(existing: str, generated: str) -> str:
manual_sections = _extract_manual_sections(existing)
result_lines = []
for section, lines in _iterate_sections(generated):
if section is not None:
lines = manual_sections.get(section, lines)
result_lines.extend(lines)
return "".join(result_lines)
def _extract_manual_sections(code: str) -> dict[str, list[str]]:
result = {}
for section, lines in _iterate_sections(code):
if section is not None:
existing = result.get(section)
if existing is not None:
existing.extend(lines)
else:
result[section] = lines
return result
def _iterate_sections(code: str) -> typing.Generator[tuple[str | None, list[str]], None, None]:
current_section: str | None = None
current_lines = []
for line in code.splitlines(keepends=True):
if current_section is None:
current_lines.append(line)
match = _BEGIN_PATTERN.search(line)
if match is None:
continue
yield (None, current_lines)
current_lines = []
current_section = match.group(1)
else:
if _END_PATTERN.search(line):
yield (current_section, current_lines)
current_lines = []
current_section = None
current_lines.append(line)
yield (current_section, current_lines)

View file

@ -2374,14 +2374,20 @@ class SyntaxMeta:
class HighlightMeta(SyntaxMeta):
scope: str
font_lock_face: str | None
font_lock_feature: str | None
def __init__(self, *scope: str):
self.scope = ".".join(scope)
self.font_lock_face = None
self.font_lock_feature = None
class CommentHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("comment", *scope)
self.font_lock_face = "font-lock-comment-face"
self.font_lock_feature = "comment"
class BlockCommentHighlight(CommentHighlight):
@ -2397,6 +2403,8 @@ class LineCommentHighlight(CommentHighlight):
class ConstantHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("constant", *scope)
self.font_lock_face = "font-lock-constant-face"
self.font_lock_feature = "constant"
class LanguageConstantHighlight(ConstantHighlight):
@ -2407,6 +2415,8 @@ class LanguageConstantHighlight(ConstantHighlight):
class NumericConstantHighlight(ConstantHighlight):
def __init__(self, *scope: str):
super().__init__("numeric", *scope)
self.font_lock_feature = "number"
self.font_lock_face = "font-lock-number-face"
class EntityHighlight(HighlightMeta):
@ -2417,21 +2427,27 @@ class EntityHighlight(HighlightMeta):
class NameEntityHighlight(EntityHighlight):
def __init__(self, *scope: str):
super().__init__("name", *scope)
self.font_lock_face = "font-lock-variable-name-face"
self.font_lock_feature = "definition"
class FunctionNameEntityHighlight(NameEntityHighlight):
def __init__(self, *scope: str):
super().__init__("function", *scope)
self.font_lock_face = "font-lock-function-name-face"
class TypeNameEntityHighlight(NameEntityHighlight):
def __init__(self, *scope: str):
super().__init__("type", *scope)
self.font_lock_feature = "type"
class KeywordHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("keyword", *scope)
self.font_lock_feature = "keyword"
self.font_lock_face = "font-lock-keyword-face"
class ControlKeywordHighlight(KeywordHighlight):
@ -2447,6 +2463,8 @@ class ConditionalControlKeywordHighlight(ControlKeywordHighlight):
class OperatorKeywordHighlight(KeywordHighlight):
def __init__(self, *scope: str):
super().__init__("operator", *scope)
self.font_lock_feature = "operator"
self.font_lock_face = "font-lock-operator-face"
class ExpressionOperatorKeywordHighlight(OperatorKeywordHighlight):
@ -2462,6 +2480,8 @@ class OtherKeywordHighlight(KeywordHighlight):
class PunctuationHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("punctuation", *scope)
self.font_lock_feature = "delimiter"
self.font_lock_face = "font-lock-punctuation-face"
class SeparatorPunctuationHighlight(PunctuationHighlight):
@ -2472,6 +2492,8 @@ class SeparatorPunctuationHighlight(PunctuationHighlight):
class ParenthesisPunctuationHighlight(PunctuationHighlight):
def __init__(self, *scope: str):
super().__init__("parenthesis", *scope)
self.font_lock_feature = "bracket"
self.font_lock_face = "font-lock-bracket-face"
class OpenParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight):
@ -2487,6 +2509,8 @@ class CloseParenthesisPunctuationHighlight(ParenthesisPunctuationHighlight):
class CurlyBracePunctuationHighlight(PunctuationHighlight):
def __init__(self, *scope: str):
super().__init__("curlybrace", *scope)
self.font_lock_feature = "bracket"
self.font_lock_face = "font-lock-bracket-face"
class OpenCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight):
@ -2502,6 +2526,8 @@ class CloseCurlyBracePunctuationHighlight(CurlyBracePunctuationHighlight):
class SquareBracketPunctuationHighlight(PunctuationHighlight):
def __init__(self, *scope: str):
super().__init__("squarebracket", *scope)
self.font_lock_feature = "bracket"
self.font_lock_face = "font-lock-bracket-face"
class OpenSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight):
@ -2517,6 +2543,8 @@ class CloseSquareBracketPunctuationHighlight(SquareBracketPunctuationHighlight):
class StorageHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("storage", *scope)
self.font_lock_feature = "keyword"
self.font_lock_face = "font-lock-keyword-face"
class TypeStorageHighlight(StorageHighlight):
@ -2542,6 +2570,8 @@ class StructTypeStorageHighlight(TypeStorageHighlight):
class StringHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("string", *scope)
self.font_lock_feature = "string"
self.font_lock_face = "font-lock-string-face"
class QuotedStringHighlight(StringHighlight):
@ -2562,11 +2592,15 @@ class DoubleQuotedStringHighlight(QuotedStringHighlight):
class VariableHighlight(HighlightMeta):
def __init__(self, *scope: str):
super().__init__("variable", *scope)
self.font_lock_feature = "variable"
self.font_lock_face = "font-lock-variable-use-face"
class LanguageVariableHighlight(VariableHighlight):
def __init__(self, *scope: str):
super().__init__("language", *scope)
self.font_lock_feature = "builtin"
self.font_lock_face = "font-lock-builtin-face"
class _Highlight:

View file

@ -0,0 +1,110 @@
import parser.generated_source as generated_source
def test_signature():
input_source = f"""
This is a random thing.
Put your slug here: {generated_source.signature_token()}
Here are some more things:
- Machine Generated
- More Machine Gnerated
{generated_source.begin_manual_section('foo')}
- You can edit here!
{generated_source.end_manual_section()}
- But not here.
{generated_source.begin_manual_section('bar')}
- You can edit here too!
{generated_source.end_manual_section()}
- Also not here.
"""
signed = generated_source.sign_generated_source(input_source)
assert signed != input_source
assert generated_source.is_signed(signed)
assert generated_source.validate_signature(signed)
def test_manual_changes():
input_source = f"""
This is a random thing.
Put your slug here: {generated_source.signature_token()}
Here are some more things:
- Machine Generated
- More Machine Gnerated
{generated_source.begin_manual_section('foo')}
- XXXXX
{generated_source.end_manual_section()}
- But not here.
"""
signed = generated_source.sign_generated_source(input_source)
modified = signed.replace("XXXXX", "YYYYY")
assert modified != signed
assert generated_source.is_signed(modified)
assert generated_source.validate_signature(modified)
def test_bad_changes():
input_source = f"""
This is a random thing.
Put your slug here: {generated_source.signature_token()}
Here are some more things:
- Machine Generated
- More Machine Gnerated
{generated_source.begin_manual_section('foo')}
- XXXXX
{generated_source.end_manual_section()}
- ZZZZZ
"""
signed = generated_source.sign_generated_source(input_source)
modified = signed.replace("ZZZZZ", "YYYYY")
assert modified != signed
assert generated_source.is_signed(modified)
assert not generated_source.validate_signature(modified)
def test_merge_changes():
original_source = f"""
A
// {generated_source.begin_manual_section('foo')}
B
// {generated_source.end_manual_section()}
C
// {generated_source.begin_manual_section('bar')}
D
// {generated_source.end_manual_section()}
"""
new_source = f"""
E
// {generated_source.begin_manual_section('bar')}
F
// {generated_source.end_manual_section()}
// {generated_source.begin_manual_section('foo')}
G
// {generated_source.end_manual_section()}
H
"""
merged = generated_source.merge_existing(original_source, new_source)
assert (
merged
== f"""
E
// {generated_source.begin_manual_section('bar')}
D
// {generated_source.end_manual_section()}
// {generated_source.begin_manual_section('foo')}
B
// {generated_source.end_manual_section()}
H
"""
)