diff --git a/grammar.py b/grammar.py index d674111..bdfb371 100644 --- a/grammar.py +++ b/grammar.py @@ -1,8 +1,5 @@ # This is an example grammar. -import re -import typing - -from parser import Assoc, Grammar, Nothing, rule, seq, Rule, Terminal, Re, TerminalKind +from parser import Assoc, Grammar, Nothing, rule, seq, Rule, Terminal, Re, Highlight, mark, opt class FineGrammar(Grammar): @@ -53,7 +50,11 @@ class FineGrammar(Grammar): @rule("ClassDeclaration") def class_declaration(self) -> Rule: - return seq(self.CLASS, self.IDENTIFIER, self._class_body) + return seq( + self.CLASS, + mark(self.IDENTIFIER, highlight=Highlight.Entity.Name.Type), + self._class_body, + ) @rule def _class_body(self) -> Rule: @@ -100,21 +101,23 @@ class FineGrammar(Grammar): # Functions @rule("FunctionDecl") def function_declaration(self) -> Rule: - return seq(self.FUN, self.IDENTIFIER, self.function_parameters, self.block) | seq( + return seq( self.FUN, - self.IDENTIFIER, + mark(self.IDENTIFIER, highlight=Highlight.Entity.Name.Function), self.function_parameters, - self.ARROW, - self.type_expression, + opt(self.ARROW, self.type_expression), self.block, ) @rule("ParamList") def function_parameters(self) -> Rule: - return ( - seq(self.LPAREN, self.RPAREN) - | seq(self.LPAREN, self._first_parameter, self.RPAREN) - | seq(self.LPAREN, self._first_parameter, self.COMMA, self._parameter_list, self.RPAREN) + return seq( + self.LPAREN, + opt( + self._first_parameter, + opt(self.COMMA, self._parameter_list), + ), + self.RPAREN, ) @rule @@ -132,11 +135,10 @@ class FineGrammar(Grammar): # Block @rule("Block") def block(self) -> Rule: - return ( - seq(self.LCURLY, self.RCURLY) - | seq(self.LCURLY, self.expression, self.RCURLY) - | seq(self.LCURLY, self._statement_list, self.RCURLY) - | seq(self.LCURLY, self._statement_list, self.expression, self.RCURLY) + return seq( + self.LCURLY, + opt(opt(self._statement_list), self.expression) + self.RCURLY, ) @rule @@ -326,32 +328,32 @@ class FineGrammar(Grammar): BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) COMMENT = Terminal( Re.seq(Re.literal("//"), Re.set("\n").invert().star()), - kind=TerminalKind.Comment.Line, + highlight=Highlight.Comment.Line, ) - ARROW = Terminal("->", kind=TerminalKind.Keyword.Operator) - AS = Terminal("as", kind=TerminalKind.Keyword.Operator.Expression) - BAR = Terminal("|", kind=TerminalKind.Keyword.Operator.Expression) - CLASS = Terminal("class", kind=TerminalKind.Storage.Type.Class) - COLON = Terminal(":", kind=TerminalKind.Punctuation.Separator) - ELSE = Terminal("else", kind=TerminalKind.Keyword.Control.Conditional) - FOR = Terminal("for", kind=TerminalKind.Keyword.Control) - FUN = Terminal("fun", kind=TerminalKind.Storage.Type.Function) + ARROW = Terminal("->", highlight=Highlight.Keyword.Operator) + AS = Terminal("as", highlight=Highlight.Keyword.Operator.Expression) + BAR = Terminal("|", highlight=Highlight.Keyword.Operator.Expression) + CLASS = Terminal("class", highlight=Highlight.Storage.Type.Class) + COLON = Terminal(":", highlight=Highlight.Punctuation.Separator) + ELSE = Terminal("else", highlight=Highlight.Keyword.Control.Conditional) + FOR = Terminal("for", highlight=Highlight.Keyword.Control) + FUN = Terminal("fun", highlight=Highlight.Storage.Type.Function) IDENTIFIER = Terminal( Re.seq( Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), ), - # kind=TerminalKind.Variable, #? + # highlight=Highlight.Variable, #? ) - IF = Terminal("if", kind=TerminalKind.Keyword.Control.Conditional) - IMPORT = Terminal("import", kind=TerminalKind.Keyword.Other) - IN = Terminal("in", kind=TerminalKind.Keyword.Operator) - LCURLY = Terminal("{", kind=TerminalKind.Punctuation.CurlyBrace.Open) - RCURLY = Terminal("}", kind=TerminalKind.Punctuation.CurlyBrace.Close) - LET = Terminal("let", kind=TerminalKind.Keyword.Other) - RETURN = Terminal("return", kind=TerminalKind.Keyword.Control) - SEMICOLON = Terminal(";", kind=TerminalKind.Punctuation.Separator) + IF = Terminal("if", highlight=Highlight.Keyword.Control.Conditional) + IMPORT = Terminal("import", highlight=Highlight.Keyword.Other) + IN = Terminal("in", highlight=Highlight.Keyword.Operator) + LCURLY = Terminal("{", highlight=Highlight.Punctuation.CurlyBrace.Open) + RCURLY = Terminal("}", highlight=Highlight.Punctuation.CurlyBrace.Close) + LET = Terminal("let", highlight=Highlight.Keyword.Other) + RETURN = Terminal("return", highlight=Highlight.Keyword.Control) + SEMICOLON = Terminal(";", highlight=Highlight.Punctuation.Separator) STRING = Terminal( # Double-quoted string. Re.seq( @@ -365,27 +367,27 @@ class FineGrammar(Grammar): (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), Re.literal("'"), ), - kind=TerminalKind.String.Quoted, + highlight=Highlight.String.Quoted, ) - WHILE = Terminal("while", kind=TerminalKind.Keyword.Control) - EQUAL = Terminal("=", kind=TerminalKind.Keyword.Operator.Expression) - LPAREN = Terminal("(", kind=TerminalKind.Punctuation.Parenthesis.Open) - RPAREN = Terminal(")", kind=TerminalKind.Punctuation.Parenthesis.Close) - COMMA = Terminal(",", kind=TerminalKind.Punctuation.Separator) - SELF = Terminal("self", name="SELFF", kind=TerminalKind.Variable.Language) - OR = Terminal("or", kind=TerminalKind.Keyword.Operator.Expression) - IS = Terminal("is", kind=TerminalKind.Keyword.Operator.Expression) - AND = Terminal("and", kind=TerminalKind.Keyword.Operator.Expression) - EQUALEQUAL = Terminal("==", kind=TerminalKind.Keyword.Operator.Expression) - BANGEQUAL = Terminal("!=", kind=TerminalKind.Keyword.Operator.Expression) - LESS = Terminal("<", kind=TerminalKind.Keyword.Operator.Expression) - GREATER = Terminal(">", kind=TerminalKind.Keyword.Operator.Expression) - LESSEQUAL = Terminal("<=", kind=TerminalKind.Keyword.Operator.Expression) - GREATEREQUAL = Terminal(">=", kind=TerminalKind.Keyword.Operator.Expression) - PLUS = Terminal("+", kind=TerminalKind.Keyword.Operator.Expression) - MINUS = Terminal("-", kind=TerminalKind.Keyword.Operator.Expression) - STAR = Terminal("*", kind=TerminalKind.Keyword.Operator.Expression) - SLASH = Terminal("/", kind=TerminalKind.Keyword.Operator.Expression) + WHILE = Terminal("while", highlight=Highlight.Keyword.Control) + EQUAL = Terminal("=", highlight=Highlight.Keyword.Operator.Expression) + LPAREN = Terminal("(", highlight=Highlight.Punctuation.Parenthesis.Open) + RPAREN = Terminal(")", highlight=Highlight.Punctuation.Parenthesis.Close) + COMMA = Terminal(",", highlight=Highlight.Punctuation.Separator) + SELF = Terminal("self", name="SELFF", highlight=Highlight.Variable.Language) + OR = Terminal("or", highlight=Highlight.Keyword.Operator.Expression) + IS = Terminal("is", highlight=Highlight.Keyword.Operator.Expression) + AND = Terminal("and", highlight=Highlight.Keyword.Operator.Expression) + EQUALEQUAL = Terminal("==", highlight=Highlight.Keyword.Operator.Expression) + BANGEQUAL = Terminal("!=", highlight=Highlight.Keyword.Operator.Expression) + LESS = Terminal("<", highlight=Highlight.Keyword.Operator.Expression) + GREATER = Terminal(">", highlight=Highlight.Keyword.Operator.Expression) + LESSEQUAL = Terminal("<=", highlight=Highlight.Keyword.Operator.Expression) + GREATEREQUAL = Terminal(">=", highlight=Highlight.Keyword.Operator.Expression) + PLUS = Terminal("+", highlight=Highlight.Keyword.Operator.Expression) + MINUS = Terminal("-", highlight=Highlight.Keyword.Operator.Expression) + STAR = Terminal("*", highlight=Highlight.Keyword.Operator.Expression) + SLASH = Terminal("/", highlight=Highlight.Keyword.Operator.Expression) NUMBER = Terminal( Re.seq( Re.set(("0", "9")).plus(), @@ -399,18 +401,18 @@ class FineGrammar(Grammar): Re.set(("0", "9")).plus(), ).question(), ), - kind=TerminalKind.Constant.Numeric, + highlight=Highlight.Constant.Numeric, ) - TRUE = Terminal("true", kind=TerminalKind.Constant.Language) - FALSE = Terminal("false", kind=TerminalKind.Constant.Language) - BANG = Terminal("!", kind=TerminalKind.Keyword.Operator.Expression) - DOT = Terminal(".", kind=TerminalKind.Punctuation.Separator) - MATCH = Terminal("match", kind=TerminalKind.Keyword.Other) - EXPORT = Terminal("export", kind=TerminalKind.Keyword.Other) - UNDERSCORE = Terminal("_", kind=TerminalKind.Variable.Language) - NEW = Terminal("new", kind=TerminalKind.Keyword.Operator) - LSQUARE = Terminal("[", kind=TerminalKind.Punctuation.SquareBracket.Open) - RSQUARE = Terminal("]", kind=TerminalKind.Punctuation.SquareBracket.Close) + TRUE = Terminal("true", highlight=Highlight.Constant.Language) + FALSE = Terminal("false", highlight=Highlight.Constant.Language) + BANG = Terminal("!", highlight=Highlight.Keyword.Operator.Expression) + DOT = Terminal(".", highlight=Highlight.Punctuation.Separator) + MATCH = Terminal("match", highlight=Highlight.Keyword.Other) + EXPORT = Terminal("export", highlight=Highlight.Keyword.Other) + UNDERSCORE = Terminal("_", highlight=Highlight.Variable.Language) + NEW = Terminal("new", highlight=Highlight.Keyword.Operator) + LSQUARE = Terminal("[", highlight=Highlight.Punctuation.SquareBracket.Open) + RSQUARE = Terminal("]", highlight=Highlight.Punctuation.SquareBracket.Close) if __name__ == "__main__": diff --git a/parser/parser.py b/parser/parser.py index 98ac13b..40e7830 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1715,6 +1715,19 @@ class NothingRule(Rule): Nothing = NothingRule() +class OptionalRule(Rule): + """A rule that matches if one or another rule matches.""" + + def __init__(self, rule: Rule): + self.rule = rule + + def flatten(self) -> typing.Generator[list[str | Terminal], None, None]: + # All the things from the left of the alternative, then all the things + # from the right, never intermingled. + yield from self.rule.flatten() + yield from Nothing.flatten() + + def seq(*args: Rule) -> Rule: """A rule that matches a sequence of rules. @@ -1726,6 +1739,16 @@ def seq(*args: Rule) -> Rule: return result +def opt(*args: Rule) -> Rule: + return OptionalRule(seq(*args)) + + +def mark(rule: Rule, **kwargs) -> Rule: + # TODO: Figure out how to incorporate this into the world. + del kwargs + return rule + + @typing.overload def rule(f: typing.Callable, /) -> Rule: ... @@ -2554,78 +2577,86 @@ def dump_lexer_table(table: LexerTable, name: str = "lexer.dot"): # This here might be enough to produce extremely basic TextMate # grammars but anything more complicated will want tree patterns # anyway, and we can only do tree patterns by influencing the grammar. -class TerminalMeta: +class SyntaxMeta: pass -class TerminalKind(TerminalMeta): - class Comment(TerminalMeta): - class Block(TerminalMeta): +class Highlight(SyntaxMeta): + class Comment(SyntaxMeta): + class Block(SyntaxMeta): pass - class Line(TerminalMeta): + class Line(SyntaxMeta): pass - class Constant(TerminalMeta): - class Language(TerminalMeta): + class Constant(SyntaxMeta): + class Language(SyntaxMeta): pass - class Numeric(TerminalMeta): + class Numeric(SyntaxMeta): pass - class Keyword(TerminalMeta): - class Control(TerminalMeta): - class Conditional(TerminalMeta): + class Entity(SyntaxMeta): + class Name(SyntaxMeta): + class Function(SyntaxMeta): pass - class Operator(TerminalMeta): - class Expression(TerminalMeta): + class Type(SyntaxMeta): pass - class Other(TerminalMeta): + class Keyword(SyntaxMeta): + class Control(SyntaxMeta): + class Conditional(SyntaxMeta): + pass + + class Operator(SyntaxMeta): + class Expression(SyntaxMeta): + pass + + class Other(SyntaxMeta): pass - class Punctuation(TerminalMeta): - class Separator(TerminalMeta): + class Punctuation(SyntaxMeta): + class Separator(SyntaxMeta): pass - class Parenthesis(TerminalMeta): - class Open(TerminalMeta): + class Parenthesis(SyntaxMeta): + class Open(SyntaxMeta): pass - class Close(TerminalMeta): + class Close(SyntaxMeta): pass - class CurlyBrace(TerminalMeta): - class Open(TerminalMeta): + class CurlyBrace(SyntaxMeta): + class Open(SyntaxMeta): pass - class Close(TerminalMeta): + class Close(SyntaxMeta): pass - class SquareBracket(TerminalMeta): - class Open(TerminalMeta): + class SquareBracket(SyntaxMeta): + class Open(SyntaxMeta): pass - class Close(TerminalMeta): + class Close(SyntaxMeta): pass - class Storage(TerminalMeta): - class Type(TerminalMeta): - class Class(TerminalMeta): + class Storage(SyntaxMeta): + class Type(SyntaxMeta): + class Class(SyntaxMeta): pass - class Function(TerminalMeta): + class Function(SyntaxMeta): pass - class String(TerminalMeta): - class Quoted(TerminalMeta): - class Single(TerminalMeta): + class String(SyntaxMeta): + class Quoted(SyntaxMeta): + class Single(SyntaxMeta): pass - class Double(TerminalMeta): + class Double(SyntaxMeta): pass - class Variable(TerminalMeta): - class Language(TerminalMeta): + class Variable(SyntaxMeta): + class Language(SyntaxMeta): pass