Associate metadata with terminals

This is a half-assed attempt at doing syntax coloring which I think
will almost certainly turn out to be insufficient. I'm committing it
just to record some of the work I've done but. BUT.

Probably trying to match tree-sitter is a better way of doing
this. (But, like, emitting tree-sitter grammars? Really? Wow, dude.
Way to give up.)
This commit is contained in:
John Doty 2024-08-27 15:43:07 -07:00
parent 76ef85483e
commit 49ad7fdb52
2 changed files with 149 additions and 60 deletions

View file

@ -2,16 +2,7 @@
import re import re
import typing import typing
from parser import ( from parser import Assoc, Grammar, Nothing, rule, seq, Rule, Terminal, Re, TerminalKind
Assoc,
Grammar,
Nothing,
rule,
seq,
Rule,
Terminal,
Re,
)
class FineGrammar(Grammar): class FineGrammar(Grammar):
@ -333,30 +324,34 @@ class FineGrammar(Grammar):
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression) return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
COMMENT = Terminal(Re.seq(Re.literal("//"), Re.set("\n").invert().star())) COMMENT = Terminal(
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
kind=TerminalKind.Comment.Line,
)
ARROW = Terminal("->") ARROW = Terminal("->", kind=TerminalKind.Keyword.Operator)
AS = Terminal("as") AS = Terminal("as", kind=TerminalKind.Keyword.Operator.Expression)
BAR = Terminal("bar") BAR = Terminal("|", kind=TerminalKind.Keyword.Operator.Expression)
CLASS = Terminal("class") CLASS = Terminal("class", kind=TerminalKind.Storage.Type.Class)
COLON = Terminal("colon") COLON = Terminal(":", kind=TerminalKind.Punctuation.Separator)
ELSE = Terminal("else") ELSE = Terminal("else", kind=TerminalKind.Keyword.Control.Conditional)
FOR = Terminal("for") FOR = Terminal("for", kind=TerminalKind.Keyword.Control)
FUN = Terminal("fun") FUN = Terminal("fun", kind=TerminalKind.Storage.Type.Function)
IDENTIFIER = Terminal( IDENTIFIER = Terminal(
Re.seq( Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
) ),
# kind=TerminalKind.Variable, #?
) )
IF = Terminal("if") IF = Terminal("if", kind=TerminalKind.Keyword.Control.Conditional)
IMPORT = Terminal("import") IMPORT = Terminal("import", kind=TerminalKind.Keyword.Other)
IN = Terminal("in") IN = Terminal("in", kind=TerminalKind.Keyword.Operator)
LCURLY = Terminal("{") LCURLY = Terminal("{", kind=TerminalKind.Punctuation.CurlyBrace.Open)
LET = Terminal("Let") RCURLY = Terminal("}", kind=TerminalKind.Punctuation.CurlyBrace.Close)
RCURLY = Terminal("}") LET = Terminal("Let", kind=TerminalKind.Keyword.Other)
RETURN = Terminal("return") RETURN = Terminal("return", kind=TerminalKind.Keyword.Control)
SEMICOLON = Terminal(";") SEMICOLON = Terminal(";", kind=TerminalKind.Punctuation.Separator)
STRING = Terminal( STRING = Terminal(
# Double-quoted string. # Double-quoted string.
Re.seq( Re.seq(
@ -369,27 +364,28 @@ class FineGrammar(Grammar):
Re.literal("'"), Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal("'"), Re.literal("'"),
) ),
kind=TerminalKind.String.Quoted,
) )
WHILE = Terminal("while") WHILE = Terminal("while", kind=TerminalKind.Keyword.Control)
EQUAL = Terminal("=") EQUAL = Terminal("=", kind=TerminalKind.Keyword.Operator.Expression)
LPAREN = Terminal("(") LPAREN = Terminal("(", kind=TerminalKind.Punctuation.Parenthesis.Open)
RPAREN = Terminal(")") RPAREN = Terminal(")", kind=TerminalKind.Punctuation.Parenthesis.Close)
COMMA = Terminal(",") COMMA = Terminal(",", kind=TerminalKind.Punctuation.Separator)
SELF = Terminal("self", name="SELFF") SELF = Terminal("self", name="SELFF", kind=TerminalKind.Variable.Language)
OR = Terminal("or") OR = Terminal("or", kind=TerminalKind.Keyword.Operator.Expression)
IS = Terminal("is") IS = Terminal("is", kind=TerminalKind.Keyword.Operator.Expression)
AND = Terminal("and") AND = Terminal("and", kind=TerminalKind.Keyword.Operator.Expression)
EQUALEQUAL = Terminal("==") EQUALEQUAL = Terminal("==", kind=TerminalKind.Keyword.Operator.Expression)
BANGEQUAL = Terminal("!=") BANGEQUAL = Terminal("!=", kind=TerminalKind.Keyword.Operator.Expression)
LESS = Terminal("<") LESS = Terminal("<", kind=TerminalKind.Keyword.Operator.Expression)
GREATER = Terminal(">") GREATER = Terminal(">", kind=TerminalKind.Keyword.Operator.Expression)
LESSEQUAL = Terminal("<=") LESSEQUAL = Terminal("<=", kind=TerminalKind.Keyword.Operator.Expression)
GREATEREQUAL = Terminal(">=") GREATEREQUAL = Terminal(">=", kind=TerminalKind.Keyword.Operator.Expression)
PLUS = Terminal("+") PLUS = Terminal("+", kind=TerminalKind.Keyword.Operator.Expression)
MINUS = Terminal("-") MINUS = Terminal("-", kind=TerminalKind.Keyword.Operator.Expression)
STAR = Terminal("*") STAR = Terminal("*", kind=TerminalKind.Keyword.Operator.Expression)
SLASH = Terminal("/") SLASH = Terminal("/", kind=TerminalKind.Keyword.Operator.Expression)
NUMBER = Terminal( NUMBER = Terminal(
Re.seq( Re.seq(
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
@ -402,18 +398,19 @@ class FineGrammar(Grammar):
Re.set("+", "-").question(), Re.set("+", "-").question(),
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
).question(), ).question(),
) ),
kind=TerminalKind.Constant.Numeric,
) )
TRUE = Terminal("true") TRUE = Terminal("true", kind=TerminalKind.Constant.Language)
FALSE = Terminal("false") FALSE = Terminal("false", kind=TerminalKind.Constant.Language)
BANG = Terminal("!") BANG = Terminal("!", kind=TerminalKind.Keyword.Operator.Expression)
DOT = Terminal(".") DOT = Terminal(".", kind=TerminalKind.Punctuation.Separator)
MATCH = Terminal("match") MATCH = Terminal("match", kind=TerminalKind.Keyword.Other)
EXPORT = Terminal("export") EXPORT = Terminal("export", kind=TerminalKind.Keyword.Other)
UNDERSCORE = Terminal("_") UNDERSCORE = Terminal("_", kind=TerminalKind.Variable.Language)
NEW = Terminal("new") NEW = Terminal("new", kind=TerminalKind.Keyword.Operator)
LSQUARE = Terminal("[") LSQUARE = Terminal("[", kind=TerminalKind.Punctuation.SquareBracket.Open)
RSQUARE = Terminal("]") RSQUARE = Terminal("]", kind=TerminalKind.Punctuation.SquareBracket.Close)
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------

View file

@ -1609,10 +1609,14 @@ class Terminal(Rule):
value: str | None value: str | None
pattern: "str | Re" pattern: "str | Re"
meta: dict[str, typing.Any]
regex: bool
def __init__(self, pattern, *, name=None): def __init__(self, pattern: "str|Re", *, name: str | None = None, **kwargs):
self.value = name self.value = name
self.pattern = pattern self.pattern = pattern
self.meta = kwargs
self.regex = isinstance(pattern, Re)
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]: def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
# We are just ourselves when flattened. # We are just ourselves when flattened.
@ -2537,3 +2541,91 @@ def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"):
pass pass
f.write("}\n") f.write("}\n")
# NOTE: We have rich metadata system man, wow, how cool are we?
#
# The whole point of this stuff here is to allow automatic
# generation/maintenance of syntax coloring for editors. And maybe some
# other stuff? This is *extremely provisional*, I'm not even sure it
# makes sense yet. Tree sitter works differently, for example, and it's
# not clear at all what we want to generate for any particular editor.
#
# This here might be enough to produce extremely basic TextMate
# grammars but anything more complicated will want tree patterns
# anyway, and we can only do tree patterns by influencing the grammar.
class TerminalMeta:
pass
class TerminalKind(TerminalMeta):
class Comment(TerminalMeta):
class Block(TerminalMeta):
pass
class Line(TerminalMeta):
pass
class Constant(TerminalMeta):
class Language(TerminalMeta):
pass
class Numeric(TerminalMeta):
pass
class Keyword(TerminalMeta):
class Control(TerminalMeta):
class Conditional(TerminalMeta):
pass
class Operator(TerminalMeta):
class Expression(TerminalMeta):
pass
class Other(TerminalMeta):
pass
class Punctuation(TerminalMeta):
class Separator(TerminalMeta):
pass
class Parenthesis(TerminalMeta):
class Open(TerminalMeta):
pass
class Close(TerminalMeta):
pass
class CurlyBrace(TerminalMeta):
class Open(TerminalMeta):
pass
class Close(TerminalMeta):
pass
class SquareBracket(TerminalMeta):
class Open(TerminalMeta):
pass
class Close(TerminalMeta):
pass
class Storage(TerminalMeta):
class Type(TerminalMeta):
class Class(TerminalMeta):
pass
class Function(TerminalMeta):
pass
class String(TerminalMeta):
class Quoted(TerminalMeta):
class Single(TerminalMeta):
pass
class Double(TerminalMeta):
pass
class Variable(TerminalMeta):
class Language(TerminalMeta):
pass