lrparsers/grammar.py
John Doty d7a6891519 Finish annotating test grammar, forced breaks, fixes
Forced breaks force a newline in a spot, which is sometimes what we
want. (Like, this syntax should *never* be on a single line.)
2024-09-13 11:57:16 -07:00

513 lines
18 KiB
Python

# This is an example grammar.
from parser import (
Assoc,
Grammar,
Re,
Rule,
Terminal,
alt,
br,
group,
highlight,
indent,
mark,
nl,
opt,
rule,
seq,
sp,
)
class FineGrammar(Grammar):
# generator = parser.GenerateLR1
start = "File"
trivia = ["BLANKS", "COMMENT"]
def __init__(self):
super().__init__(
precedence=[
(Assoc.RIGHT, [self.EQUAL]),
(Assoc.LEFT, [self.OR]),
(Assoc.LEFT, [self.IS]),
(Assoc.LEFT, [self.AND]),
(Assoc.LEFT, [self.EQUALEQUAL, self.BANGEQUAL]),
(Assoc.LEFT, [self.LESS, self.GREATER, self.GREATEREQUAL, self.LESSEQUAL]),
(Assoc.LEFT, [self.PLUS, self.MINUS]),
(Assoc.LEFT, [self.STAR, self.SLASH]),
(Assoc.LEFT, [self.primary_expression]),
(Assoc.LEFT, [self.LPAREN]),
(Assoc.LEFT, [self.DOT]),
#
# If there's a confusion about whether to make an IF
# statement or an expression, prefer the statement.
#
(Assoc.NONE, [self.if_statement]),
],
)
@rule("File")
def file(self) -> Rule:
return self._file_statement_list
@rule
def _file_statement_list(self) -> Rule:
return alt(
self._file_statement,
self._file_statement_list + nl + self._file_statement,
)
@rule
def _file_statement(self) -> Rule:
return (
self.import_statement | self.class_declaration | self.export_statement | self._statement
)
@rule
def import_statement(self) -> Rule:
return group(
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
)
@rule("ClassDeclaration")
def class_declaration(self) -> Rule:
return seq(
group(
group(
self.CLASS,
sp,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
sp,
),
self.LCURLY,
),
indent(nl, mark(opt(self.class_body), field="body")),
nl,
self.RCURLY,
)
@rule("ClassBody")
def class_body(self) -> Rule:
return self._class_members
@rule
def _class_members(self) -> Rule:
return self._class_member | seq(self._class_members, self._class_member)
@rule
def _class_member(self) -> Rule:
return self.field_declaration | self.function_declaration
@rule("FieldDecl")
def field_declaration(self) -> Rule:
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON) + nl
# Types
@rule("TypeExpression")
def type_expression(self) -> Rule:
return self.alternate_type | self.type_identifier
@rule("AlternateType")
def alternate_type(self) -> Rule:
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
@rule("TypeIdentifier")
def type_identifier(self) -> Rule:
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
@rule
def export_statement(self) -> Rule:
return alt(
group(self.EXPORT, sp, self.class_declaration),
group(self.EXPORT, sp, self.function_declaration),
group(self.EXPORT, sp, self.let_statement),
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
)
@rule
def export_list(self) -> Rule:
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
# Functions
@rule("FunctionDecl")
def function_declaration(self) -> Rule:
return seq(
group(
self.FUN,
sp,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.function),
sp,
mark(self.function_parameters, field="parameters"),
mark(opt(sp, self.ARROW, sp, self.type_expression), field="return_type"),
),
mark(self.block, field="body"),
)
@rule("ParamList")
def function_parameters(self) -> Rule:
return group(
self.LPAREN,
indent(
nl,
opt(
self._first_parameter
| seq(self._first_parameter, self.COMMA)
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
),
),
nl,
self.RPAREN,
)
@rule
def _first_parameter(self) -> Rule:
return self.SELF | self.parameter
@rule
def _parameter_list(self) -> Rule:
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
@rule("Parameter")
def parameter(self) -> Rule:
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
# Block
@rule("Block")
def block(self) -> Rule:
return alt(
group(self.LCURLY, nl, self.RCURLY),
seq(self.LCURLY, indent(br, self.block_body), br, self.RCURLY),
)
@rule("BlockBody")
def block_body(self) -> Rule:
return alt(
self.expression,
self._statement_list,
seq(self._statement_list, br, self.expression),
)
@rule
def _statement_list(self) -> Rule:
return self._statement | seq(self._statement_list, br, self._statement)
@rule
def _statement(self) -> Rule:
return (
self.function_declaration
| self.let_statement
| self.return_statement
| self.for_statement
| self.if_statement
| self.while_statement
| self.expression_statement
)
@rule("LetStatement")
def let_statement(self) -> Rule:
return group(
self.LET,
sp,
self.IDENTIFIER,
indent(sp, self.EQUAL, indent(sp, group(self.expression, self.SEMICOLON))),
)
@rule("ReturnStatement")
def return_statement(self) -> Rule:
return alt(
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
group(self.RETURN, self.SEMICOLON),
)
@rule("ForStatement")
def for_statement(self) -> Rule:
return group(
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
self.block,
)
@rule("IteratorVariable")
def iterator_variable(self) -> Rule:
return self.IDENTIFIER
@rule("IfStatement")
def if_statement(self) -> Rule:
return self.conditional_expression
@rule
def while_statement(self) -> Rule:
return group(group(self.WHILE, sp, self.expression), sp, self.block)
@rule
def expression_statement(self) -> Rule:
return seq(self.expression, self.SEMICOLON)
# Expressions
@rule(transparent=True)
def expression(self) -> Rule:
return self.binary_expression | self.is_expression | self.primary_expression
@rule("BinaryExpression")
def binary_expression(self) -> Rule:
return alt(
group(self.expression, sp, self.EQUAL, sp, self.expression),
group(self.expression, sp, self.OR, sp, self.expression),
group(self.expression, sp, self.AND, sp, self.expression),
group(self.expression, sp, self.EQUALEQUAL, sp, self.expression),
group(self.expression, sp, self.BANGEQUAL, sp, self.expression),
group(self.expression, sp, self.LESS, sp, self.expression),
group(self.expression, sp, self.LESSEQUAL, sp, self.expression),
group(self.expression, sp, self.GREATER, sp, self.expression),
group(self.expression, sp, self.GREATEREQUAL, sp, self.expression),
group(self.expression, sp, self.PLUS, sp, self.expression),
group(self.expression, sp, self.MINUS, sp, self.expression),
group(self.expression, sp, self.STAR, sp, self.expression),
group(self.expression, sp, self.SLASH, sp, self.expression),
)
@rule("IsExpression")
def is_expression(self) -> Rule:
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
@rule
def primary_expression(self) -> Rule:
return (
self.identifier_expression
| self.literal_expression
| self.SELF
| seq(self.BANG, self.primary_expression)
| seq(self.MINUS, self.primary_expression)
| self.block
| self.conditional_expression
| self.list_constructor_expression
| self.object_constructor_expression
| self.match_expression
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
| group(
self.primary_expression,
self.LPAREN,
indent(nl, self._expression_list),
nl,
self.RPAREN,
)
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
)
@rule("IdentifierExpression")
def identifier_expression(self):
return self.IDENTIFIER
@rule("Literal")
def literal_expression(self):
return self.NUMBER | self.STRING | self.TRUE | self.FALSE
@rule("ConditionalExpression")
def conditional_expression(self) -> Rule:
return (
seq(group(self.IF, sp, self.expression), sp, self.block)
| seq(
group(self.IF, sp, self.expression),
sp,
self.block,
sp,
self.ELSE,
sp,
self.conditional_expression,
)
| seq(
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
)
)
@rule
def list_constructor_expression(self) -> Rule:
return alt(
group(self.LSQUARE, nl, self.RSQUARE),
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
)
@rule
def _expression_list(self) -> Rule:
return (
self.expression
| seq(self.expression, self.COMMA)
| seq(self.expression, self.COMMA, sp, self._expression_list)
)
@rule
def match_expression(self) -> Rule:
return group(group(self.MATCH, sp, self.expression), sp, self.match_body)
@rule("MatchBody")
def match_body(self) -> Rule:
return alt(
group(self.LCURLY, nl, self.RCURLY),
group(self.LCURLY, indent(nl, self._match_arms), nl, self.RCURLY),
)
@rule
def _match_arms(self) -> Rule:
return (
self.match_arm
| seq(self.match_arm, self.COMMA)
| seq(self.match_arm, self.COMMA, br, self._match_arms)
)
@rule("MatchArm")
def match_arm(self) -> Rule:
return group(self.pattern, sp, self.ARROW, sp, self.expression)
@rule("Pattern")
def pattern(self) -> Rule:
return (
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
| group(self.variable_binding, self._pattern_core)
| self._pattern_core
)
@rule
def _pattern_core(self) -> Rule:
return self.type_expression | self.wildcard_pattern
@rule("WildcardPattern")
def wildcard_pattern(self) -> Rule:
return self.UNDERSCORE
@rule("VariableBinding")
def variable_binding(self) -> Rule:
return seq(self.IDENTIFIER, self.COLON)
@rule
def object_constructor_expression(self) -> Rule:
return group(self.NEW, sp, self.type_identifier, self.field_list)
@rule
def field_list(self) -> Rule:
return alt(
seq(self.LCURLY, self.RCURLY),
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
)
@rule
def field_values(self) -> Rule:
return (
self.field_value
| seq(self.field_value, self.COMMA)
| seq(self.field_value, self.COMMA, sp, self.field_values)
)
@rule
def field_value(self) -> Rule:
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
COMMENT = Terminal(
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
highlight=highlight.comment.line,
)
ARROW = Terminal("->", highlight=highlight.keyword.operator)
AS = Terminal("as", highlight=highlight.keyword.operator.expression)
BAR = Terminal("|", highlight=highlight.keyword.operator.expression)
CLASS = Terminal("class", highlight=highlight.storage.type.klass)
COLON = Terminal(":", highlight=highlight.punctuation.separator)
ELSE = Terminal("else", highlight=highlight.keyword.control.conditional)
FOR = Terminal("for", highlight=highlight.keyword.control)
FUN = Terminal("fun", highlight=highlight.storage.type.function)
IDENTIFIER = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
IF = Terminal("if", highlight=highlight.keyword.control.conditional)
IMPORT = Terminal("import", highlight=highlight.keyword.other)
IN = Terminal("in", highlight=highlight.keyword.operator)
LCURLY = Terminal("{", highlight=highlight.punctuation.curly_brace.open)
RCURLY = Terminal("}", highlight=highlight.punctuation.curly_brace.close)
LET = Terminal("let", highlight=highlight.keyword.other)
RETURN = Terminal("return", highlight=highlight.keyword.control)
SEMICOLON = Terminal(";", highlight=highlight.punctuation.separator)
STRING = Terminal(
# Double-quoted string.
Re.seq(
Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal('"'),
)
# Single-quoted string.
| Re.seq(
Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal("'"),
),
highlight=highlight.string.quoted,
)
WHILE = Terminal("while", highlight=highlight.keyword.control)
EQUAL = Terminal("=", highlight=highlight.keyword.operator.expression)
LPAREN = Terminal("(", highlight=highlight.punctuation.parenthesis.open)
RPAREN = Terminal(")", highlight=highlight.punctuation.parenthesis.close)
COMMA = Terminal(",", highlight=highlight.punctuation.separator)
SELF = Terminal("self", name="SELFF", highlight=highlight.variable.language)
OR = Terminal("or", highlight=highlight.keyword.operator.expression)
IS = Terminal("is", highlight=highlight.keyword.operator.expression)
AND = Terminal("and", highlight=highlight.keyword.operator.expression)
EQUALEQUAL = Terminal("==", highlight=highlight.keyword.operator.expression)
BANGEQUAL = Terminal("!=", highlight=highlight.keyword.operator.expression)
LESS = Terminal("<", highlight=highlight.keyword.operator.expression)
GREATER = Terminal(">", highlight=highlight.keyword.operator.expression)
LESSEQUAL = Terminal("<=", highlight=highlight.keyword.operator.expression)
GREATEREQUAL = Terminal(">=", highlight=highlight.keyword.operator.expression)
PLUS = Terminal("+", highlight=highlight.keyword.operator.expression)
MINUS = Terminal("-", highlight=highlight.keyword.operator.expression)
STAR = Terminal("*", highlight=highlight.keyword.operator.expression)
SLASH = Terminal("/", highlight=highlight.keyword.operator.expression)
NUMBER = Terminal(
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
).question(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
),
highlight=highlight.constant.numeric,
)
TRUE = Terminal("true", highlight=highlight.constant.language)
FALSE = Terminal("false", highlight=highlight.constant.language)
BANG = Terminal("!", highlight=highlight.keyword.operator.expression)
DOT = Terminal(".", highlight=highlight.punctuation.separator)
MATCH = Terminal("match", highlight=highlight.keyword.other)
EXPORT = Terminal("export", highlight=highlight.keyword.other)
UNDERSCORE = Terminal("_", highlight=highlight.variable.language)
NEW = Terminal("new", highlight=highlight.keyword.operator)
LSQUARE = Terminal("[", highlight=highlight.punctuation.square_bracket.open)
RSQUARE = Terminal("]", highlight=highlight.punctuation.square_bracket.close)
if __name__ == "__main__":
from pathlib import Path
from parser.parser import dump_lexer_table
from parser.emacs import emit_emacs_major_mode
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
# TODO: Actually generate a lexer/parser for some runtime.
grammar = FineGrammar()
grammar.build_table()
lexer = grammar.compile_lexer()
dump_lexer_table(lexer)
# Generate tree-sitter parser and emacs mode.
ts_path = Path(__file__).parent / "tree-sitter-fine"
emit_tree_sitter_grammar(grammar, ts_path)
emit_tree_sitter_queries(grammar, ts_path)
emit_emacs_major_mode(grammar, ts_path / "fine.el")
# TODO: Generate pretty-printer code.