It's really not clear how to track it and how to compose it with groups yet. Really very difficult.
532 lines
18 KiB
Python
532 lines
18 KiB
Python
# This is an example grammar.
|
|
from parser import (
|
|
Assoc,
|
|
Grammar,
|
|
Re,
|
|
Rule,
|
|
Terminal,
|
|
TriviaMode,
|
|
alt,
|
|
br,
|
|
group,
|
|
highlight,
|
|
indent,
|
|
mark,
|
|
nl,
|
|
opt,
|
|
rule,
|
|
seq,
|
|
sp,
|
|
)
|
|
|
|
|
|
class FineGrammar(Grammar):
|
|
# generator = parser.GenerateLR1
|
|
start = "File"
|
|
|
|
trivia = ["BLANKS", "LINE_BREAKS", "COMMENT"]
|
|
|
|
pretty_indent = " "
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
precedence=[
|
|
(Assoc.RIGHT, [self.EQUAL]),
|
|
(Assoc.LEFT, [self.OR]),
|
|
(Assoc.LEFT, [self.IS]),
|
|
(Assoc.LEFT, [self.AND]),
|
|
(Assoc.LEFT, [self.EQUALEQUAL, self.BANGEQUAL]),
|
|
(Assoc.LEFT, [self.LESS, self.GREATER, self.GREATEREQUAL, self.LESSEQUAL]),
|
|
(Assoc.LEFT, [self.PLUS, self.MINUS]),
|
|
(Assoc.LEFT, [self.STAR, self.SLASH]),
|
|
(Assoc.LEFT, [self.primary_expression]),
|
|
(Assoc.LEFT, [self.LPAREN]),
|
|
(Assoc.LEFT, [self.DOT]),
|
|
#
|
|
# If there's a confusion about whether to make an IF
|
|
# statement or an expression, prefer the statement.
|
|
#
|
|
(Assoc.NONE, [self.if_statement]),
|
|
],
|
|
)
|
|
|
|
@rule("File")
|
|
def file(self) -> Rule:
|
|
return self._file_statement_list
|
|
|
|
@rule
|
|
def _file_statement_list(self) -> Rule:
|
|
return alt(
|
|
self._file_statement,
|
|
self._file_statement_list + nl + self._file_statement,
|
|
)
|
|
|
|
@rule
|
|
def _file_statement(self) -> Rule:
|
|
return (
|
|
self.import_statement | self.class_declaration | self.export_statement | self._statement
|
|
)
|
|
|
|
@rule
|
|
def import_statement(self) -> Rule:
|
|
return group(
|
|
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
|
|
)
|
|
|
|
@rule("ClassDeclaration")
|
|
def class_declaration(self) -> Rule:
|
|
return seq(
|
|
group(
|
|
self.CLASS,
|
|
sp,
|
|
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
|
sp,
|
|
self.LCURLY,
|
|
),
|
|
indent(nl, mark(opt(self.class_body), field="body")),
|
|
nl,
|
|
self.RCURLY,
|
|
nl, # Extra newline at the end of the class
|
|
)
|
|
|
|
@rule("ClassBody")
|
|
def class_body(self) -> Rule:
|
|
return self._class_members
|
|
|
|
@rule
|
|
def _class_members(self) -> Rule:
|
|
return self._class_member | seq(self._class_members, nl, self._class_member)
|
|
|
|
@rule
|
|
def _class_member(self) -> Rule:
|
|
return self.field_declaration | self.function_declaration
|
|
|
|
@rule("FieldDecl")
|
|
def field_declaration(self) -> Rule:
|
|
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON)
|
|
|
|
# Types
|
|
@rule("TypeExpression")
|
|
def type_expression(self) -> Rule:
|
|
return self.alternate_type | self.type_identifier
|
|
|
|
@rule("AlternateType")
|
|
def alternate_type(self) -> Rule:
|
|
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
|
|
|
|
@rule("TypeIdentifier")
|
|
def type_identifier(self) -> Rule:
|
|
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
|
|
|
|
@rule
|
|
def export_statement(self) -> Rule:
|
|
return alt(
|
|
group(self.EXPORT, sp, self.class_declaration),
|
|
group(self.EXPORT, sp, self.function_declaration),
|
|
group(self.EXPORT, sp, self.let_statement),
|
|
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
|
|
)
|
|
|
|
@rule
|
|
def export_list(self) -> Rule:
|
|
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
|
|
|
|
# Functions
|
|
@rule("FunctionDecl")
|
|
def function_declaration(self) -> Rule:
|
|
return seq(
|
|
group(
|
|
group(
|
|
group(
|
|
self.FUN,
|
|
sp,
|
|
mark(
|
|
self.IDENTIFIER,
|
|
field="name",
|
|
highlight=highlight.entity.name.function,
|
|
),
|
|
),
|
|
nl,
|
|
mark(self.function_parameters, field="parameters"),
|
|
),
|
|
mark(
|
|
opt(indent(sp, group(self.ARROW, sp, self.type_expression))),
|
|
field="return_type",
|
|
),
|
|
),
|
|
sp,
|
|
mark(self.block, field="body"),
|
|
nl,
|
|
)
|
|
|
|
@rule("ParamList")
|
|
def function_parameters(self) -> Rule:
|
|
return group(
|
|
self.LPAREN,
|
|
indent(
|
|
nl,
|
|
opt(
|
|
self._first_parameter
|
|
| seq(self._first_parameter, self.COMMA)
|
|
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
|
|
),
|
|
),
|
|
nl,
|
|
self.RPAREN,
|
|
)
|
|
|
|
@rule
|
|
def _first_parameter(self) -> Rule:
|
|
return self.SELF | self.parameter
|
|
|
|
@rule
|
|
def _parameter_list(self) -> Rule:
|
|
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
|
|
|
|
@rule("Parameter")
|
|
def parameter(self) -> Rule:
|
|
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
|
|
|
|
# Block
|
|
@rule("Block")
|
|
def block(self) -> Rule:
|
|
return alt(
|
|
group(self.LCURLY, nl, self.RCURLY),
|
|
group(self.LCURLY, indent(sp, self.block_body), sp, self.RCURLY),
|
|
)
|
|
|
|
@rule("BlockBody")
|
|
def block_body(self) -> Rule:
|
|
return alt(
|
|
self.expression,
|
|
self._statement_list,
|
|
seq(self._statement_list, br, self.expression),
|
|
)
|
|
|
|
@rule
|
|
def _statement_list(self) -> Rule:
|
|
return self._statement | seq(self._statement_list, br, self._statement)
|
|
|
|
@rule
|
|
def _statement(self) -> Rule:
|
|
return (
|
|
self.function_declaration
|
|
| self.let_statement
|
|
| self.return_statement
|
|
| self.for_statement
|
|
| self.if_statement
|
|
| self.while_statement
|
|
| self.expression_statement
|
|
)
|
|
|
|
@rule("LetStatement")
|
|
def let_statement(self) -> Rule:
|
|
return group(
|
|
self.LET,
|
|
sp,
|
|
self.IDENTIFIER,
|
|
indent(sp, self.EQUAL, indent(sp, group(self.expression, self.SEMICOLON))),
|
|
)
|
|
|
|
@rule("ReturnStatement")
|
|
def return_statement(self) -> Rule:
|
|
return alt(
|
|
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
|
|
group(self.RETURN, self.SEMICOLON),
|
|
)
|
|
|
|
@rule("ForStatement")
|
|
def for_statement(self) -> Rule:
|
|
return group(
|
|
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
|
|
self.block,
|
|
)
|
|
|
|
@rule("IteratorVariable")
|
|
def iterator_variable(self) -> Rule:
|
|
return self.IDENTIFIER
|
|
|
|
@rule("IfStatement")
|
|
def if_statement(self) -> Rule:
|
|
return self.conditional_expression
|
|
|
|
@rule
|
|
def while_statement(self) -> Rule:
|
|
return group(group(self.WHILE, sp, self.expression), sp, self.block)
|
|
|
|
@rule
|
|
def expression_statement(self) -> Rule:
|
|
return seq(self.expression, self.SEMICOLON)
|
|
|
|
# Expressions
|
|
@rule(transparent=True)
|
|
def expression(self) -> Rule:
|
|
return self.binary_expression | self.is_expression | self.primary_expression
|
|
|
|
@rule("BinaryExpression")
|
|
def binary_expression(self) -> Rule:
|
|
return alt(
|
|
group(self.expression, sp, self.EQUAL, sp, self.expression),
|
|
group(self.expression, sp, self.OR, sp, self.expression),
|
|
group(self.expression, sp, self.AND, sp, self.expression),
|
|
group(self.expression, sp, self.EQUALEQUAL, sp, self.expression),
|
|
group(self.expression, sp, self.BANGEQUAL, sp, self.expression),
|
|
group(self.expression, sp, self.LESS, sp, self.expression),
|
|
group(self.expression, sp, self.LESSEQUAL, sp, self.expression),
|
|
group(self.expression, sp, self.GREATER, sp, self.expression),
|
|
group(self.expression, sp, self.GREATEREQUAL, sp, self.expression),
|
|
group(self.expression, sp, self.PLUS, sp, self.expression),
|
|
group(self.expression, sp, self.MINUS, sp, self.expression),
|
|
group(self.expression, sp, self.STAR, sp, self.expression),
|
|
group(self.expression, sp, self.SLASH, sp, self.expression),
|
|
)
|
|
|
|
@rule("IsExpression")
|
|
def is_expression(self) -> Rule:
|
|
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
|
|
|
|
@rule
|
|
def primary_expression(self) -> Rule:
|
|
return (
|
|
self.identifier_expression
|
|
| self.literal_expression
|
|
| self.SELF
|
|
| seq(self.BANG, self.primary_expression)
|
|
| seq(self.MINUS, self.primary_expression)
|
|
| self.block
|
|
| self.conditional_expression
|
|
| self.list_constructor_expression
|
|
| self.object_constructor_expression
|
|
| self.match_expression
|
|
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
|
|
| group(
|
|
self.primary_expression,
|
|
self.LPAREN,
|
|
indent(nl, self._expression_list),
|
|
nl,
|
|
self.RPAREN,
|
|
)
|
|
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
|
|
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
|
|
)
|
|
|
|
@rule("IdentifierExpression")
|
|
def identifier_expression(self):
|
|
return self.IDENTIFIER
|
|
|
|
@rule("Literal")
|
|
def literal_expression(self):
|
|
return self.NUMBER | self.STRING | self.TRUE | self.FALSE
|
|
|
|
@rule("ConditionalExpression")
|
|
def conditional_expression(self) -> Rule:
|
|
return (
|
|
seq(group(self.IF, sp, self.expression), sp, self.block)
|
|
| seq(
|
|
group(self.IF, sp, self.expression),
|
|
sp,
|
|
self.block,
|
|
sp,
|
|
self.ELSE,
|
|
sp,
|
|
self.conditional_expression,
|
|
)
|
|
| seq(
|
|
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
|
|
)
|
|
)
|
|
|
|
@rule
|
|
def list_constructor_expression(self) -> Rule:
|
|
return alt(
|
|
group(self.LSQUARE, nl, self.RSQUARE),
|
|
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
|
|
)
|
|
|
|
@rule
|
|
def _expression_list(self) -> Rule:
|
|
return (
|
|
self.expression
|
|
| seq(self.expression, self.COMMA)
|
|
| seq(self.expression, self.COMMA, sp, self._expression_list)
|
|
)
|
|
|
|
@rule
|
|
def match_expression(self) -> Rule:
|
|
return group(
|
|
group(self.MATCH, sp, self.expression, sp, self.LCURLY),
|
|
indent(sp, self.match_arms),
|
|
sp,
|
|
self.RCURLY,
|
|
)
|
|
|
|
@rule("MatchArms")
|
|
def match_arms(self) -> Rule:
|
|
return self._match_arms
|
|
|
|
@rule
|
|
def _match_arms(self) -> Rule:
|
|
return (
|
|
self.match_arm
|
|
| seq(self.match_arm, self.COMMA)
|
|
| seq(self.match_arm, self.COMMA, br, self._match_arms)
|
|
)
|
|
|
|
@rule("MatchArm")
|
|
def match_arm(self) -> Rule:
|
|
return group(self.pattern, sp, self.ARROW, sp, self.expression)
|
|
|
|
@rule("Pattern")
|
|
def pattern(self) -> Rule:
|
|
return (
|
|
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
|
|
| group(self.variable_binding, self._pattern_core)
|
|
| self._pattern_core
|
|
)
|
|
|
|
@rule
|
|
def _pattern_core(self) -> Rule:
|
|
return self.type_expression | self.wildcard_pattern
|
|
|
|
@rule("WildcardPattern")
|
|
def wildcard_pattern(self) -> Rule:
|
|
return self.UNDERSCORE
|
|
|
|
@rule("VariableBinding")
|
|
def variable_binding(self) -> Rule:
|
|
return seq(self.IDENTIFIER, self.COLON)
|
|
|
|
@rule
|
|
def object_constructor_expression(self) -> Rule:
|
|
return group(self.NEW, sp, self.type_identifier, sp, self.field_list)
|
|
|
|
@rule
|
|
def field_list(self) -> Rule:
|
|
return alt(
|
|
seq(self.LCURLY, self.RCURLY),
|
|
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
|
|
)
|
|
|
|
@rule
|
|
def field_values(self) -> Rule:
|
|
return (
|
|
self.field_value
|
|
| seq(self.field_value, self.COMMA)
|
|
| seq(self.field_value, self.COMMA, sp, self.field_values)
|
|
)
|
|
|
|
@rule
|
|
def field_value(self) -> Rule:
|
|
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
|
|
|
|
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
|
LINE_BREAKS = Terminal(Re.set("\r", "\n").plus(), trivia_mode=TriviaMode.NewLine)
|
|
COMMENT = Terminal(
|
|
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
|
|
highlight=highlight.comment.line,
|
|
trivia_mode=TriviaMode.LineComment,
|
|
)
|
|
|
|
ARROW = Terminal("->", highlight=highlight.keyword.operator)
|
|
AS = Terminal("as", highlight=highlight.keyword.operator.expression)
|
|
BAR = Terminal("|", highlight=highlight.keyword.operator.expression)
|
|
CLASS = Terminal("class", highlight=highlight.storage.type.klass)
|
|
COLON = Terminal(":", highlight=highlight.punctuation.separator)
|
|
ELSE = Terminal("else", highlight=highlight.keyword.control.conditional)
|
|
FOR = Terminal("for", highlight=highlight.keyword.control)
|
|
FUN = Terminal("fun", highlight=highlight.storage.type.function)
|
|
IDENTIFIER = Terminal(
|
|
Re.seq(
|
|
Re.set(("a", "z"), ("A", "Z"), "_"),
|
|
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
|
),
|
|
)
|
|
IF = Terminal("if", highlight=highlight.keyword.control.conditional)
|
|
IMPORT = Terminal("import", highlight=highlight.keyword.other)
|
|
IN = Terminal("in", highlight=highlight.keyword.operator)
|
|
LCURLY = Terminal("{", highlight=highlight.punctuation.curly_brace.open)
|
|
RCURLY = Terminal("}", highlight=highlight.punctuation.curly_brace.close)
|
|
LET = Terminal("let", highlight=highlight.keyword.other)
|
|
RETURN = Terminal("return", highlight=highlight.keyword.control)
|
|
SEMICOLON = Terminal(";", highlight=highlight.punctuation.separator)
|
|
STRING = Terminal(
|
|
# Double-quoted string.
|
|
Re.seq(
|
|
Re.literal('"'),
|
|
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
|
|
Re.literal('"'),
|
|
)
|
|
# Single-quoted string.
|
|
| Re.seq(
|
|
Re.literal("'"),
|
|
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
|
|
Re.literal("'"),
|
|
),
|
|
highlight=highlight.string.quoted,
|
|
)
|
|
WHILE = Terminal("while", highlight=highlight.keyword.control)
|
|
EQUAL = Terminal("=", highlight=highlight.keyword.operator.expression)
|
|
LPAREN = Terminal("(", highlight=highlight.punctuation.parenthesis.open)
|
|
RPAREN = Terminal(")", highlight=highlight.punctuation.parenthesis.close)
|
|
COMMA = Terminal(",", highlight=highlight.punctuation.separator)
|
|
SELF = Terminal("self", name="SELFF", highlight=highlight.variable.language)
|
|
OR = Terminal("or", highlight=highlight.keyword.operator.expression)
|
|
IS = Terminal("is", highlight=highlight.keyword.operator.expression)
|
|
AND = Terminal("and", highlight=highlight.keyword.operator.expression)
|
|
EQUALEQUAL = Terminal("==", highlight=highlight.keyword.operator.expression)
|
|
BANGEQUAL = Terminal("!=", highlight=highlight.keyword.operator.expression)
|
|
LESS = Terminal("<", highlight=highlight.keyword.operator.expression)
|
|
GREATER = Terminal(">", highlight=highlight.keyword.operator.expression)
|
|
LESSEQUAL = Terminal("<=", highlight=highlight.keyword.operator.expression)
|
|
GREATEREQUAL = Terminal(">=", highlight=highlight.keyword.operator.expression)
|
|
PLUS = Terminal("+", highlight=highlight.keyword.operator.expression)
|
|
MINUS = Terminal("-", highlight=highlight.keyword.operator.expression)
|
|
STAR = Terminal("*", highlight=highlight.keyword.operator.expression)
|
|
SLASH = Terminal("/", highlight=highlight.keyword.operator.expression)
|
|
NUMBER = Terminal(
|
|
Re.seq(
|
|
Re.set(("0", "9")).plus(),
|
|
Re.seq(
|
|
Re.literal("."),
|
|
Re.set(("0", "9")).plus(),
|
|
).question(),
|
|
Re.seq(
|
|
Re.set("e", "E"),
|
|
Re.set("+", "-").question(),
|
|
Re.set(("0", "9")).plus(),
|
|
).question(),
|
|
),
|
|
highlight=highlight.constant.numeric,
|
|
)
|
|
TRUE = Terminal("true", highlight=highlight.constant.language)
|
|
FALSE = Terminal("false", highlight=highlight.constant.language)
|
|
BANG = Terminal("!", highlight=highlight.keyword.operator.expression)
|
|
DOT = Terminal(".", highlight=highlight.punctuation.separator)
|
|
MATCH = Terminal("match", highlight=highlight.keyword.other)
|
|
EXPORT = Terminal("export", highlight=highlight.keyword.other)
|
|
UNDERSCORE = Terminal("_", highlight=highlight.variable.language)
|
|
NEW = Terminal("new", highlight=highlight.keyword.operator)
|
|
LSQUARE = Terminal("[", highlight=highlight.punctuation.square_bracket.open)
|
|
RSQUARE = Terminal("]", highlight=highlight.punctuation.square_bracket.close)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from pathlib import Path
|
|
from parser.parser import dump_lexer_table
|
|
from parser.emacs import emit_emacs_major_mode
|
|
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
|
|
|
|
# TODO: Actually generate a lexer/parser for some runtime.
|
|
grammar = FineGrammar()
|
|
grammar.build_table()
|
|
|
|
lexer = grammar.compile_lexer()
|
|
dump_lexer_table(lexer)
|
|
|
|
# Generate tree-sitter parser and emacs mode.
|
|
ts_path = Path(__file__).parent / "tree-sitter-fine"
|
|
emit_tree_sitter_grammar(grammar, ts_path)
|
|
emit_tree_sitter_queries(grammar, ts_path)
|
|
emit_emacs_major_mode(grammar, ts_path / "fine.el")
|
|
|
|
# TODO: Generate pretty-printer code.
|