Look I'm just thinking hard about converting to a parser generator because I want to derive the pretty-printer from the parser without having to repeat myself all over the place. This parser.py is derived from my old LRParsers project, and should go back there eventually, but for now I'm driving the work from here.
388 lines
11 KiB
Python
388 lines
11 KiB
Python
from parser import Assoc, Grammar, Nothing, Token, rule, seq
|
|
|
|
ARROW = Token("Arrow")
|
|
AS = Token("As")
|
|
BAR = Token("Bar")
|
|
CLASS = Token("Class")
|
|
COLON = Token("Colon")
|
|
ELSE = Token("Else")
|
|
FOR = Token("For")
|
|
FUN = Token("Fun")
|
|
IDENTIFIER = Token("Identifier")
|
|
IF = Token("If")
|
|
IMPORT = Token("Import")
|
|
IN = Token("In")
|
|
LCURLY = Token("LeftBrace")
|
|
LET = Token("Let")
|
|
RCURLY = Token("RightBrace")
|
|
RETURN = Token("Return")
|
|
SEMICOLON = Token("Semicolon")
|
|
STRING = Token("String")
|
|
WHILE = Token("While")
|
|
EQUAL = Token("Equal")
|
|
LPAREN = Token("LeftParen")
|
|
RPAREN = Token("RightParen")
|
|
COMMA = Token("Comma")
|
|
SELF = Token("Selff")
|
|
OR = Token("Or")
|
|
IS = Token("Is")
|
|
AND = Token("And")
|
|
EQUALEQUAL = Token("EqualEqual")
|
|
BANGEQUAL = Token("BangEqual")
|
|
LESS = Token("Less")
|
|
GREATER = Token("Greater")
|
|
LESSEQUAL = Token("LessEqual")
|
|
GREATEREQUAL = Token("GreaterEqual")
|
|
PLUS = Token("Plus")
|
|
MINUS = Token("Minus")
|
|
STAR = Token("Star")
|
|
SLASH = Token("Slash")
|
|
NUMBER = Token("Number")
|
|
TRUE = Token("True")
|
|
FALSE = Token("False")
|
|
BANG = Token("Bang")
|
|
DOT = Token("Dot")
|
|
MATCH = Token("Match")
|
|
EXPORT = Token("Export")
|
|
UNDERSCORE = Token("Underscore")
|
|
NEW = Token("New")
|
|
LSQUARE = Token("LeftBracket")
|
|
RSQUARE = Token("RightBracket")
|
|
|
|
|
|
class FineGrammar(Grammar):
|
|
def __init__(self):
|
|
super().__init__(
|
|
precedence=[
|
|
(Assoc.RIGHT, [EQUAL]),
|
|
(Assoc.LEFT, [OR]),
|
|
(Assoc.LEFT, [IS]),
|
|
(Assoc.LEFT, [AND]),
|
|
(Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
|
|
(Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
|
|
(Assoc.LEFT, [PLUS, MINUS]),
|
|
(Assoc.LEFT, [STAR, SLASH]),
|
|
(Assoc.LEFT, [self.primary_expression]),
|
|
(Assoc.LEFT, [LPAREN]),
|
|
(Assoc.LEFT, [DOT]),
|
|
#
|
|
# If there's a confusion about whether to make an IF
|
|
# statement or an expression, prefer the statement.
|
|
#
|
|
(Assoc.NONE, [self.if_statement]),
|
|
]
|
|
)
|
|
|
|
@rule
|
|
def file(self):
|
|
return self.file_statement_list
|
|
|
|
@rule
|
|
def file_statement_list(self):
|
|
return self.file_statement | (self.file_statement_list + self.file_statement)
|
|
|
|
@rule
|
|
def file_statement(self):
|
|
return (
|
|
self.import_statement | self.class_declaration | self.export_statement | self.statement
|
|
)
|
|
|
|
@rule
|
|
def import_statement(self):
|
|
return seq(IMPORT, STRING, AS, IDENTIFIER, SEMICOLON)
|
|
|
|
@rule
|
|
def class_declaration(self):
|
|
return seq(CLASS, IDENTIFIER, self.class_body)
|
|
|
|
@rule
|
|
def class_body(self):
|
|
return seq(LCURLY, RCURLY) | seq(LCURLY, self.class_members, RCURLY)
|
|
|
|
@rule
|
|
def class_members(self):
|
|
return self.class_member | seq(self.class_members, self.class_member)
|
|
|
|
@rule
|
|
def class_member(self):
|
|
return self.field_declaration | self.function_declaration
|
|
|
|
@rule
|
|
def field_declaration(self):
|
|
return seq(IDENTIFIER, COLON, self.type_expression, SEMICOLON)
|
|
|
|
# Types
|
|
@rule
|
|
def type_expression(self):
|
|
return self.alternate_type | self.type_identifier
|
|
|
|
@rule
|
|
def alternate_type(self):
|
|
return seq(self.type_expression, BAR, self.type_identifier)
|
|
|
|
@rule
|
|
def type_identifier(self):
|
|
return IDENTIFIER
|
|
|
|
@rule
|
|
def export_statement(self):
|
|
return (
|
|
seq(EXPORT, self.class_declaration)
|
|
| seq(EXPORT, self.function_declaration)
|
|
| seq(EXPORT, self.let_statement)
|
|
| seq(EXPORT, self.export_list, SEMICOLON)
|
|
)
|
|
|
|
@rule
|
|
def export_list(self):
|
|
return Nothing | IDENTIFIER | seq(IDENTIFIER, COMMA, self.export_list)
|
|
|
|
# Functions
|
|
@rule
|
|
def function_declaration(self):
|
|
return seq(FUN, IDENTIFIER, self.function_parameters, self.block) | seq(
|
|
FUN, IDENTIFIER, self.function_parameters, ARROW, self.type_expression, self.block
|
|
)
|
|
|
|
@rule
|
|
def function_parameters(self):
|
|
return (
|
|
seq(LPAREN, RPAREN)
|
|
| seq(LPAREN, self.first_parameter, RPAREN)
|
|
| seq(LPAREN, self.first_parameter, COMMA, self.parameter_list, RPAREN)
|
|
)
|
|
|
|
@rule
|
|
def first_parameter(self):
|
|
return SELF | self.parameter
|
|
|
|
@rule
|
|
def parameter_list(self):
|
|
return Nothing | self.parameter | seq(self.parameter, COMMA, self.parameter_list)
|
|
|
|
@rule
|
|
def parameter(self):
|
|
return seq(IDENTIFIER, COLON, self.type_expression)
|
|
|
|
# Block
|
|
@rule
|
|
def block(self):
|
|
return (
|
|
seq(LCURLY, RCURLY)
|
|
| seq(LCURLY, self.statement_list, RCURLY)
|
|
| seq(LCURLY, self.statement_list, self.expression, RCURLY)
|
|
)
|
|
|
|
@rule
|
|
def statement_list(self):
|
|
return self.statement | seq(self.statement_list, self.statement)
|
|
|
|
@rule
|
|
def statement(self):
|
|
return (
|
|
self.function_declaration
|
|
| self.let_statement
|
|
| self.return_statement
|
|
| self.for_statement
|
|
| self.if_statement
|
|
| self.while_statement
|
|
| self.expression_statement
|
|
)
|
|
|
|
@rule
|
|
def let_statement(self):
|
|
return seq(LET, IDENTIFIER, EQUAL, self.expression, SEMICOLON)
|
|
|
|
@rule
|
|
def return_statement(self):
|
|
return seq(RETURN, self.expression, SEMICOLON)
|
|
|
|
@rule
|
|
def for_statement(self):
|
|
return seq(FOR, self.iterator_variable, IN, self.expression, self.block)
|
|
|
|
@rule
|
|
def iterator_variable(self):
|
|
return IDENTIFIER
|
|
|
|
@rule
|
|
def if_statement(self):
|
|
return self.conditional_expression
|
|
|
|
@rule
|
|
def while_statement(self):
|
|
return seq(WHILE, self.expression, self.block)
|
|
|
|
@rule
|
|
def expression_statement(self):
|
|
return seq(self.expression, SEMICOLON)
|
|
|
|
# Expressions
|
|
@rule
|
|
def expression(self):
|
|
return self.assignment_expression
|
|
|
|
@rule
|
|
def assignment_expression(self):
|
|
return seq(self.or_expression, EQUAL, self.assignment_expression) | self.or_expression
|
|
|
|
@rule
|
|
def or_expression(self):
|
|
return seq(self.or_expression, OR, self.is_expression) | self.is_expression
|
|
|
|
@rule
|
|
def is_expression(self):
|
|
return seq(self.is_expression, IS, self.pattern) | self.and_expression
|
|
|
|
@rule
|
|
def and_expression(self):
|
|
return seq(self.and_expression, AND, self.equality_expression) | self.equality_expression
|
|
|
|
@rule
|
|
def equality_expression(self):
|
|
return (
|
|
seq(self.equality_expression, EQUALEQUAL, self.relation_expression)
|
|
| seq(self.equality_expression, BANGEQUAL, self.relation_expression)
|
|
| self.relation_expression
|
|
)
|
|
|
|
@rule
|
|
def relation_expression(self):
|
|
return (
|
|
seq(self.relation_expression, LESS, self.additive_expression)
|
|
| seq(self.relation_expression, LESSEQUAL, self.additive_expression)
|
|
| seq(self.relation_expression, GREATER, self.additive_expression)
|
|
| seq(self.relation_expression, GREATEREQUAL, self.additive_expression)
|
|
)
|
|
|
|
@rule
|
|
def additive_expression(self):
|
|
return (
|
|
seq(self.additive_expression, PLUS, self.multiplication_expression)
|
|
| seq(self.additive_expression, MINUS, self.multiplication_expression)
|
|
| self.multiplication_expression
|
|
)
|
|
|
|
@rule
|
|
def multiplication_expression(self):
|
|
return (
|
|
seq(self.multiplication_expression, STAR, self.primary_expression)
|
|
| seq(self.multiplication_expression, SLASH, self.primary_expression)
|
|
| self.primary_expression
|
|
)
|
|
|
|
@rule
|
|
def primary_expression(self):
|
|
return (
|
|
IDENTIFIER
|
|
| SELF
|
|
| NUMBER
|
|
| STRING
|
|
| TRUE
|
|
| FALSE
|
|
| seq(BANG, self.primary_expression)
|
|
| seq(MINUS, self.primary_expression)
|
|
| self.block
|
|
| self.conditional_expression
|
|
| self.list_constructor_expression
|
|
| self.object_constructor_expression
|
|
| self.match_expression
|
|
| seq(self.primary_expression, LPAREN, self.expression_list, RPAREN)
|
|
| seq(self.primary_expression, DOT, IDENTIFIER)
|
|
| seq(LPAREN, self.expression, RPAREN)
|
|
)
|
|
|
|
@rule
|
|
def conditional_expression(self):
|
|
return (
|
|
seq(IF, self.expression, self.block)
|
|
| seq(IF, self.expression, self.block, ELSE, self.conditional_expression)
|
|
| seq(IF, self.expression, self.block, ELSE, self.block)
|
|
)
|
|
|
|
@rule
|
|
def list_constructor_expression(self):
|
|
return seq(LSQUARE, RSQUARE) | seq(LSQUARE, self.expression_list, RSQUARE)
|
|
|
|
@rule
|
|
def expression_list(self):
|
|
return (
|
|
self.expression
|
|
| seq(self.expression, COMMA)
|
|
| seq(self.expression, COMMA, self.expression_list)
|
|
)
|
|
|
|
@rule
|
|
def match_expression(self):
|
|
return seq(MATCH, self.match_body)
|
|
|
|
@rule
|
|
def match_body(self):
|
|
return seq(LCURLY, RCURLY) | seq(LCURLY, self.match_arms, RCURLY)
|
|
|
|
@rule
|
|
def match_arms(self):
|
|
return (
|
|
self.match_arm
|
|
| seq(self.match_arm, COMMA)
|
|
| seq(self.match_arm, COMMA, self.match_arms)
|
|
)
|
|
|
|
@rule
|
|
def match_arm(self):
|
|
return seq(self.pattern, ARROW, self.expression)
|
|
|
|
@rule
|
|
def pattern(self):
|
|
return (
|
|
seq(self.variable_binding, self.pattern_core, AND, self.and_expression)
|
|
| seq(self.variable_binding, self.pattern_core)
|
|
| seq(self.pattern_core, AND, self.and_expression)
|
|
| self.pattern_core
|
|
)
|
|
|
|
@rule
|
|
def pattern_core(self):
|
|
return self.type_expression | self.wildcard_pattern
|
|
|
|
@rule
|
|
def wildcard_pattern(self):
|
|
return UNDERSCORE
|
|
|
|
@rule
|
|
def variable_binding(self):
|
|
return seq(IDENTIFIER, COLON)
|
|
|
|
@rule
|
|
def object_constructor_expression(self):
|
|
return seq(NEW, self.type_identifier, self.field_list)
|
|
|
|
@rule
|
|
def field_list(self):
|
|
return seq(LCURLY, RCURLY) | seq(LCURLY, self.field_values, RCURLY)
|
|
|
|
@rule
|
|
def field_values(self):
|
|
return (
|
|
self.field_value
|
|
| seq(self.field_value, COMMA)
|
|
| seq(self.field_value, COMMA, self.field_values)
|
|
)
|
|
|
|
@rule
|
|
def field_value(self):
|
|
return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
|
|
|
|
|
|
grammar = FineGrammar()
|
|
table = grammar.build_table(start="file")
|
|
|
|
print(f"{len(table)} states")
|
|
|
|
average_entries = sum(len(row) for row in table) / len(table)
|
|
max_entries = max(len(row) for row in table)
|
|
print(f"{average_entries} average, {max_entries} max")
|
|
|
|
# print(parser_faster.format_table(gen, table))
|
|
# print()
|
|
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
|