Associate metadata with terminals
This is a half-assed attempt at doing syntax coloring which I think will almost certainly turn out to be insufficient. I'm committing it just to record some of the work I've done but. BUT. Probably trying to match tree-sitter is a better way of doing this. (But, like, emitting tree-sitter grammars? Really? Wow, dude. Way to give up.)
This commit is contained in:
parent
76ef85483e
commit
49ad7fdb52
2 changed files with 149 additions and 60 deletions
115
grammar.py
115
grammar.py
|
|
@ -2,16 +2,7 @@
|
|||
import re
|
||||
import typing
|
||||
|
||||
from parser import (
|
||||
Assoc,
|
||||
Grammar,
|
||||
Nothing,
|
||||
rule,
|
||||
seq,
|
||||
Rule,
|
||||
Terminal,
|
||||
Re,
|
||||
)
|
||||
from parser import Assoc, Grammar, Nothing, rule, seq, Rule, Terminal, Re, TerminalKind
|
||||
|
||||
|
||||
class FineGrammar(Grammar):
|
||||
|
|
@ -333,30 +324,34 @@ class FineGrammar(Grammar):
|
|||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COLON, self.expression)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
COMMENT = Terminal(Re.seq(Re.literal("//"), Re.set("\n").invert().star()))
|
||||
COMMENT = Terminal(
|
||||
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
|
||||
kind=TerminalKind.Comment.Line,
|
||||
)
|
||||
|
||||
ARROW = Terminal("->")
|
||||
AS = Terminal("as")
|
||||
BAR = Terminal("bar")
|
||||
CLASS = Terminal("class")
|
||||
COLON = Terminal("colon")
|
||||
ELSE = Terminal("else")
|
||||
FOR = Terminal("for")
|
||||
FUN = Terminal("fun")
|
||||
ARROW = Terminal("->", kind=TerminalKind.Keyword.Operator)
|
||||
AS = Terminal("as", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
BAR = Terminal("|", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
CLASS = Terminal("class", kind=TerminalKind.Storage.Type.Class)
|
||||
COLON = Terminal(":", kind=TerminalKind.Punctuation.Separator)
|
||||
ELSE = Terminal("else", kind=TerminalKind.Keyword.Control.Conditional)
|
||||
FOR = Terminal("for", kind=TerminalKind.Keyword.Control)
|
||||
FUN = Terminal("fun", kind=TerminalKind.Storage.Type.Function)
|
||||
IDENTIFIER = Terminal(
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
)
|
||||
),
|
||||
# kind=TerminalKind.Variable, #?
|
||||
)
|
||||
IF = Terminal("if")
|
||||
IMPORT = Terminal("import")
|
||||
IN = Terminal("in")
|
||||
LCURLY = Terminal("{")
|
||||
LET = Terminal("Let")
|
||||
RCURLY = Terminal("}")
|
||||
RETURN = Terminal("return")
|
||||
SEMICOLON = Terminal(";")
|
||||
IF = Terminal("if", kind=TerminalKind.Keyword.Control.Conditional)
|
||||
IMPORT = Terminal("import", kind=TerminalKind.Keyword.Other)
|
||||
IN = Terminal("in", kind=TerminalKind.Keyword.Operator)
|
||||
LCURLY = Terminal("{", kind=TerminalKind.Punctuation.CurlyBrace.Open)
|
||||
RCURLY = Terminal("}", kind=TerminalKind.Punctuation.CurlyBrace.Close)
|
||||
LET = Terminal("Let", kind=TerminalKind.Keyword.Other)
|
||||
RETURN = Terminal("return", kind=TerminalKind.Keyword.Control)
|
||||
SEMICOLON = Terminal(";", kind=TerminalKind.Punctuation.Separator)
|
||||
STRING = Terminal(
|
||||
# Double-quoted string.
|
||||
Re.seq(
|
||||
|
|
@ -369,27 +364,28 @@ class FineGrammar(Grammar):
|
|||
Re.literal("'"),
|
||||
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
|
||||
Re.literal("'"),
|
||||
)
|
||||
),
|
||||
kind=TerminalKind.String.Quoted,
|
||||
)
|
||||
WHILE = Terminal("while")
|
||||
EQUAL = Terminal("=")
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
COMMA = Terminal(",")
|
||||
SELF = Terminal("self", name="SELFF")
|
||||
OR = Terminal("or")
|
||||
IS = Terminal("is")
|
||||
AND = Terminal("and")
|
||||
EQUALEQUAL = Terminal("==")
|
||||
BANGEQUAL = Terminal("!=")
|
||||
LESS = Terminal("<")
|
||||
GREATER = Terminal(">")
|
||||
LESSEQUAL = Terminal("<=")
|
||||
GREATEREQUAL = Terminal(">=")
|
||||
PLUS = Terminal("+")
|
||||
MINUS = Terminal("-")
|
||||
STAR = Terminal("*")
|
||||
SLASH = Terminal("/")
|
||||
WHILE = Terminal("while", kind=TerminalKind.Keyword.Control)
|
||||
EQUAL = Terminal("=", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
LPAREN = Terminal("(", kind=TerminalKind.Punctuation.Parenthesis.Open)
|
||||
RPAREN = Terminal(")", kind=TerminalKind.Punctuation.Parenthesis.Close)
|
||||
COMMA = Terminal(",", kind=TerminalKind.Punctuation.Separator)
|
||||
SELF = Terminal("self", name="SELFF", kind=TerminalKind.Variable.Language)
|
||||
OR = Terminal("or", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
IS = Terminal("is", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
AND = Terminal("and", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
EQUALEQUAL = Terminal("==", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
BANGEQUAL = Terminal("!=", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
LESS = Terminal("<", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
GREATER = Terminal(">", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
LESSEQUAL = Terminal("<=", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
GREATEREQUAL = Terminal(">=", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
PLUS = Terminal("+", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
MINUS = Terminal("-", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
STAR = Terminal("*", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
SLASH = Terminal("/", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
NUMBER = Terminal(
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
|
|
@ -402,18 +398,19 @@ class FineGrammar(Grammar):
|
|||
Re.set("+", "-").question(),
|
||||
Re.set(("0", "9")).plus(),
|
||||
).question(),
|
||||
)
|
||||
),
|
||||
kind=TerminalKind.Constant.Numeric,
|
||||
)
|
||||
TRUE = Terminal("true")
|
||||
FALSE = Terminal("false")
|
||||
BANG = Terminal("!")
|
||||
DOT = Terminal(".")
|
||||
MATCH = Terminal("match")
|
||||
EXPORT = Terminal("export")
|
||||
UNDERSCORE = Terminal("_")
|
||||
NEW = Terminal("new")
|
||||
LSQUARE = Terminal("[")
|
||||
RSQUARE = Terminal("]")
|
||||
TRUE = Terminal("true", kind=TerminalKind.Constant.Language)
|
||||
FALSE = Terminal("false", kind=TerminalKind.Constant.Language)
|
||||
BANG = Terminal("!", kind=TerminalKind.Keyword.Operator.Expression)
|
||||
DOT = Terminal(".", kind=TerminalKind.Punctuation.Separator)
|
||||
MATCH = Terminal("match", kind=TerminalKind.Keyword.Other)
|
||||
EXPORT = Terminal("export", kind=TerminalKind.Keyword.Other)
|
||||
UNDERSCORE = Terminal("_", kind=TerminalKind.Variable.Language)
|
||||
NEW = Terminal("new", kind=TerminalKind.Keyword.Operator)
|
||||
LSQUARE = Terminal("[", kind=TerminalKind.Punctuation.SquareBracket.Open)
|
||||
RSQUARE = Terminal("]", kind=TerminalKind.Punctuation.SquareBracket.Close)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1609,10 +1609,14 @@ class Terminal(Rule):
|
|||
|
||||
value: str | None
|
||||
pattern: "str | Re"
|
||||
meta: dict[str, typing.Any]
|
||||
regex: bool
|
||||
|
||||
def __init__(self, pattern, *, name=None):
|
||||
def __init__(self, pattern: "str|Re", *, name: str | None = None, **kwargs):
|
||||
self.value = name
|
||||
self.pattern = pattern
|
||||
self.meta = kwargs
|
||||
self.regex = isinstance(pattern, Re)
|
||||
|
||||
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
|
||||
# We are just ourselves when flattened.
|
||||
|
|
@ -2537,3 +2541,91 @@ def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"):
|
|||
|
||||
pass
|
||||
f.write("}\n")
|
||||
|
||||
|
||||
# NOTE: We have rich metadata system man, wow, how cool are we?
|
||||
#
|
||||
# The whole point of this stuff here is to allow automatic
|
||||
# generation/maintenance of syntax coloring for editors. And maybe some
|
||||
# other stuff? This is *extremely provisional*, I'm not even sure it
|
||||
# makes sense yet. Tree sitter works differently, for example, and it's
|
||||
# not clear at all what we want to generate for any particular editor.
|
||||
#
|
||||
# This here might be enough to produce extremely basic TextMate
|
||||
# grammars but anything more complicated will want tree patterns
|
||||
# anyway, and we can only do tree patterns by influencing the grammar.
|
||||
class TerminalMeta:
|
||||
pass
|
||||
|
||||
|
||||
class TerminalKind(TerminalMeta):
|
||||
class Comment(TerminalMeta):
|
||||
class Block(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Line(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Constant(TerminalMeta):
|
||||
class Language(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Numeric(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Keyword(TerminalMeta):
|
||||
class Control(TerminalMeta):
|
||||
class Conditional(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Operator(TerminalMeta):
|
||||
class Expression(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Other(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Punctuation(TerminalMeta):
|
||||
class Separator(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Parenthesis(TerminalMeta):
|
||||
class Open(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Close(TerminalMeta):
|
||||
pass
|
||||
|
||||
class CurlyBrace(TerminalMeta):
|
||||
class Open(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Close(TerminalMeta):
|
||||
pass
|
||||
|
||||
class SquareBracket(TerminalMeta):
|
||||
class Open(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Close(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Storage(TerminalMeta):
|
||||
class Type(TerminalMeta):
|
||||
class Class(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Function(TerminalMeta):
|
||||
pass
|
||||
|
||||
class String(TerminalMeta):
|
||||
class Quoted(TerminalMeta):
|
||||
class Single(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Double(TerminalMeta):
|
||||
pass
|
||||
|
||||
class Variable(TerminalMeta):
|
||||
class Language(TerminalMeta):
|
||||
pass
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue