diff --git a/dingus/worker.js b/dingus/worker.js index 8a5e66d..f37a034 100644 --- a/dingus/worker.js +++ b/dingus/worker.js @@ -1,4 +1,4 @@ -const PARSER_PACKAGE = "./wheel/lrparsers-0.8-py3-none-any.whl" +const PARSER_PACKAGE = "./wheel/lrparsers-0.7.9-py3-none-any.whl" // Load the whole pyodide thingy. @@ -108,6 +108,9 @@ def eval_grammar(code): grammar = None for key, value in grammar_globals.items(): + if isinstance(value, type) and issubclass(value, parser.Grammar) and value is not parser.Grammar: + value = value() + if isinstance(value, parser.Grammar): if grammar is None: grammar = value @@ -115,7 +118,7 @@ def eval_grammar(code): raise Exception("More than one Grammar found in the file") if grammar is None: - raise Exception("No grammar definition, make an instance of parser.Grammar") + raise Exception("No grammar definition, define or instantiate a class that inherits from parser.Grammar") GRAMMAR = grammar diff --git a/grammar.py b/grammar.py index 0981e73..aee5f78 100644 --- a/grammar.py +++ b/grammar.py @@ -20,498 +20,503 @@ from parser import ( sp, ) -@rule("File") -def file() -> Rule: - return _file_statement_list -@rule -def _file_statement_list() -> Rule: - return alt( - _file_statement, - _file_statement_list + nl + _file_statement, - ) +class FineGrammar(Grammar): + # generator = parser.GenerateLR1 + # generator = parser.GeneratePager + start = "File" -@rule -def _file_statement() -> Rule: - return ( - import_statement | class_declaration | export_statement | _statement - ) + trivia = ["BLANKS", "LINE_BREAK", "COMMENT"] -@rule -def import_statement() -> Rule: - return group( - IMPORT, sp, STRING, sp, AS, sp, IDENTIFIER, sp, SEMICOLON - ) + pretty_indent = " " -@rule("ClassDeclaration") -def class_declaration() -> Rule: - return seq( - group( - CLASS, - sp, - mark(IDENTIFIER, field="name", highlight=highlight.entity.name.type), - sp, - LCURLY, - ), - indent(nl, mark(opt(class_body), field="body")), - nl, - RCURLY, - nl, # Extra newline at the end of the class - ) + def __init__(self): + super().__init__( + precedence=[ + (Assoc.RIGHT, [self.EQUAL]), + (Assoc.LEFT, [self.OR]), + (Assoc.LEFT, [self.IS]), + (Assoc.LEFT, [self.AND]), + (Assoc.LEFT, [self.EQUALEQUAL, self.BANGEQUAL]), + (Assoc.LEFT, [self.LESS, self.GREATER, self.GREATEREQUAL, self.LESSEQUAL]), + (Assoc.LEFT, [self.PLUS, self.MINUS]), + (Assoc.LEFT, [self.STAR, self.SLASH]), + (Assoc.LEFT, [self.primary_expression]), + (Assoc.LEFT, [self.LPAREN]), + (Assoc.LEFT, [self.DOT]), + # + # If there's a confusion about whether to make an IF + # statement or an expression, prefer the statement. + # + (Assoc.NONE, [self.if_statement]), + ], + ) -@rule("ClassBody") -def class_body() -> Rule: - return _class_members + @rule("File") + def file(self) -> Rule: + return self._file_statement_list -@rule -def _class_members() -> Rule: - return _class_member | seq(_class_members, nl, _class_member) + @rule + def _file_statement_list(self) -> Rule: + return alt( + self._file_statement, + self._file_statement_list + nl + self._file_statement, + ) -@rule -def _class_member() -> Rule: - return field_declaration | function_declaration + @rule + def _file_statement(self) -> Rule: + return ( + self.import_statement | self.class_declaration | self.export_statement | self._statement + ) -@rule("FieldDecl") -def field_declaration() -> Rule: - return group(IDENTIFIER, COLON, sp, type_expression, SEMICOLON) + @rule + def import_statement(self) -> Rule: + return group( + self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON + ) -# Types -@rule("TypeExpression") -def type_expression() -> Rule: - return alternate_type | type_identifier + @rule("ClassDeclaration") + def class_declaration(self) -> Rule: + return seq( + group( + self.CLASS, + sp, + mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type), + sp, + self.LCURLY, + ), + indent(nl, mark(opt(self.class_body), field="body")), + nl, + self.RCURLY, + nl, # Extra newline at the end of the class + ) -@rule("AlternateType") -def alternate_type() -> Rule: - return group(type_expression, sp, OR, sp, type_identifier) + @rule("ClassBody") + def class_body(self) -> Rule: + return self._class_members -@rule("TypeIdentifier") -def type_identifier() -> Rule: - return mark(IDENTIFIER, field="id", highlight=highlight.entity.name.type) + @rule + def _class_members(self) -> Rule: + return self._class_member | seq(self._class_members, nl, self._class_member) -@rule -def export_statement() -> Rule: - return alt( - group(EXPORT, sp, class_declaration), - group(EXPORT, sp, function_declaration), - group(EXPORT, sp, let_statement), - group(EXPORT, sp, export_list, SEMICOLON), - ) + @rule + def _class_member(self) -> Rule: + return self.field_declaration | self.function_declaration -@rule -def export_list() -> Rule: - return IDENTIFIER | seq(IDENTIFIER, COMMA, sp, export_list) + @rule("FieldDecl") + def field_declaration(self) -> Rule: + return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON) -# Functions -@rule("FunctionDecl") -def function_declaration() -> Rule: - return seq( - group( + # Types + @rule("TypeExpression") + def type_expression(self) -> Rule: + return self.alternate_type | self.type_identifier + + @rule("AlternateType") + def alternate_type(self) -> Rule: + return group(self.type_expression, sp, self.OR, sp, self.type_identifier) + + @rule("TypeIdentifier") + def type_identifier(self) -> Rule: + return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type) + + @rule + def export_statement(self) -> Rule: + return alt( + group(self.EXPORT, sp, self.class_declaration), + group(self.EXPORT, sp, self.function_declaration), + group(self.EXPORT, sp, self.let_statement), + group(self.EXPORT, sp, self.export_list, self.SEMICOLON), + ) + + @rule + def export_list(self) -> Rule: + return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list) + + # Functions + @rule("FunctionDecl") + def function_declaration(self) -> Rule: + return seq( group( group( - FUN, - sp, - mark( - IDENTIFIER, - field="name", - highlight=highlight.entity.name.function, + group( + self.FUN, + sp, + mark( + self.IDENTIFIER, + field="name", + highlight=highlight.entity.name.function, + ), ), + nl, + mark(self.function_parameters, field="parameters"), ), + mark( + opt(indent(sp, group(self.ARROW, sp, self.type_expression))), + field="return_type", + ), + ), + sp, + mark(self.block, field="body"), + nl, + ) + + @rule("ParamList") + def function_parameters(self) -> Rule: + return group( + self.LPAREN, + indent( nl, - mark(function_parameters, field="parameters"), + opt( + self._first_parameter + | seq(self._first_parameter, self.COMMA) + | group(self._first_parameter, self.COMMA, sp, self._parameter_list) + ), ), - mark( - opt(indent(sp, group(ARROW, sp, type_expression))), - field="return_type", - ), - ), - sp, - mark(block, field="body"), - nl, - ) - -@rule("ParamList") -def function_parameters() -> Rule: - return group( - LPAREN, - indent( nl, - opt( - _first_parameter - | seq(_first_parameter, COMMA) - | group(_first_parameter, COMMA, sp, _parameter_list) + self.RPAREN, + ) + + @rule + def _first_parameter(self) -> Rule: + return self.SELF | self.parameter + + @rule + def _parameter_list(self) -> Rule: + return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list) + + @rule("Parameter") + def parameter(self) -> Rule: + return group(self.IDENTIFIER, self.COLON, sp, self.type_expression) + + # Block + @rule("Block") + def block(self) -> Rule: + return alt( + group(self.LCURLY, nl, self.RCURLY), + group(self.LCURLY, indent(br, self.block_body), sp, self.RCURLY), + ) + + @rule("BlockBody") + def block_body(self) -> Rule: + return alt( + self.expression, + self._statement_list, + seq(self._statement_list, br, self.expression), + ) + + @rule + def _statement_list(self) -> Rule: + return self._statement | seq(self._statement_list, br, self._statement) + + @rule + def _statement(self) -> Rule: + return ( + self.function_declaration + | self.let_statement + | self.return_statement + | self.for_statement + | self.if_statement + | self.while_statement + | self.expression_statement + ) + + @rule("LetStatement") + def let_statement(self) -> Rule: + return group( + group( + self.LET, + sp, + self.IDENTIFIER, + sp, + self.EQUAL, ), - ), - nl, - RPAREN, - ) - -@rule -def _first_parameter() -> Rule: - return SELF | parameter - -@rule -def _parameter_list() -> Rule: - return parameter | seq(parameter, COMMA, sp, _parameter_list) - -@rule("Parameter") -def parameter() -> Rule: - return group(IDENTIFIER, COLON, sp, type_expression) - -# Block -@rule("Block") -def block() -> Rule: - return alt( - group(LCURLY, nl, RCURLY), - group(LCURLY, indent(br, block_body), sp, RCURLY), - ) - -@rule("BlockBody") -def block_body() -> Rule: - return alt( - expression, - _statement_list, - seq(_statement_list, br, expression), - ) - -@rule -def _statement_list() -> Rule: - return _statement | seq(_statement_list, br, _statement) - -@rule -def _statement() -> Rule: - return ( - function_declaration - | let_statement - | return_statement - | for_statement - | if_statement - | while_statement - | expression_statement - ) - -@rule("LetStatement") -def let_statement() -> Rule: - return group( - group( - LET, - sp, - IDENTIFIER, - sp, - EQUAL, - ), - indent(sp, expression, SEMICOLON), - ) - -@rule("ReturnStatement") -def return_statement() -> Rule: - return alt( - group(RETURN, indent(sp, group(expression, SEMICOLON))), - group(RETURN, SEMICOLON), - ) - -@rule("ForStatement") -def for_statement() -> Rule: - return group( - group(FOR, sp, iterator_variable, sp, IN, sp, group(expression)), - block, - ) - -@rule("IteratorVariable") -def iterator_variable() -> Rule: - return IDENTIFIER - -@rule("IfStatement") -def if_statement() -> Rule: - return conditional_expression - -@rule -def while_statement() -> Rule: - return group(group(WHILE, sp, expression), sp, block) - -@rule -def expression_statement() -> Rule: - return seq(expression, SEMICOLON) - -# Expressions -@rule(transparent=True) -def expression() -> Rule: - return binary_expression | is_expression | primary_expression - -@rule("BinaryExpression") -def binary_expression() -> Rule: - return alt( - # Assignment gets special indentation. - group(group(expression, sp, EQUAL), indent(sp, expression)), - # Other ones do not. - group(group(expression, sp, OR), sp, expression), - group(group(expression, sp, AND), sp, expression), - group(group(expression, sp, EQUALEQUAL), sp, expression), - group(group(expression, sp, BANGEQUAL), sp, expression), - group(group(expression, sp, LESS), sp, expression), - group(group(expression, sp, LESSEQUAL), sp, expression), - group(group(expression, sp, GREATER), sp, expression), - group(group(expression, sp, GREATEREQUAL), sp, expression), - group(group(expression, sp, PLUS), sp, expression), - group(group(expression, sp, MINUS), sp, expression), - group(group(expression, sp, STAR), sp, expression), - group(group(expression, sp, SLASH), sp, expression), - ) - -@rule("IsExpression") -def is_expression() -> Rule: - return group(expression, sp, IS, indent(sp, pattern)) - -@rule -def primary_expression() -> Rule: - return ( - identifier_expression - | literal_expression - | SELF - | seq(BANG, primary_expression) - | seq(MINUS, primary_expression) - | block - | conditional_expression - | list_constructor_expression - | object_constructor_expression - | match_expression - | seq(primary_expression, LPAREN, RPAREN) - | group( - primary_expression, - LPAREN, - indent(nl, _expression_list), - nl, - RPAREN, + indent(sp, self.expression, self.SEMICOLON), ) - | group(primary_expression, indent(nl, DOT, IDENTIFIER)) - | group(LPAREN, indent(nl, expression), nl, RPAREN) - ) -@rule("IdentifierExpression") -def identifier_expression(): - return IDENTIFIER - -@rule("Literal") -def literal_expression(): - return NUMBER | STRING | TRUE | FALSE - -@rule("ConditionalExpression") -def conditional_expression() -> Rule: - return ( - seq(group(IF, sp, expression), sp, block) - | seq( - group(IF, sp, expression), - sp, - block, - sp, - ELSE, - sp, - conditional_expression, + @rule("ReturnStatement") + def return_statement(self) -> Rule: + return alt( + group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))), + group(self.RETURN, self.SEMICOLON), ) - | seq( - group(IF, sp, expression), sp, block, sp, ELSE, sp, block + + @rule("ForStatement") + def for_statement(self) -> Rule: + return group( + group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)), + self.block, ) + + @rule("IteratorVariable") + def iterator_variable(self) -> Rule: + return self.IDENTIFIER + + @rule("IfStatement") + def if_statement(self) -> Rule: + return self.conditional_expression + + @rule + def while_statement(self) -> Rule: + return group(group(self.WHILE, sp, self.expression), sp, self.block) + + @rule + def expression_statement(self) -> Rule: + return seq(self.expression, self.SEMICOLON) + + # Expressions + @rule(transparent=True) + def expression(self) -> Rule: + return self.binary_expression | self.is_expression | self.primary_expression + + @rule("BinaryExpression") + def binary_expression(self) -> Rule: + return alt( + # Assignment gets special indentation. + group(group(self.expression, sp, self.EQUAL), indent(sp, self.expression)), + # Other ones do not. + group(group(self.expression, sp, self.OR), sp, self.expression), + group(group(self.expression, sp, self.AND), sp, self.expression), + group(group(self.expression, sp, self.EQUALEQUAL), sp, self.expression), + group(group(self.expression, sp, self.BANGEQUAL), sp, self.expression), + group(group(self.expression, sp, self.LESS), sp, self.expression), + group(group(self.expression, sp, self.LESSEQUAL), sp, self.expression), + group(group(self.expression, sp, self.GREATER), sp, self.expression), + group(group(self.expression, sp, self.GREATEREQUAL), sp, self.expression), + group(group(self.expression, sp, self.PLUS), sp, self.expression), + group(group(self.expression, sp, self.MINUS), sp, self.expression), + group(group(self.expression, sp, self.STAR), sp, self.expression), + group(group(self.expression, sp, self.SLASH), sp, self.expression), + ) + + @rule("IsExpression") + def is_expression(self) -> Rule: + return group(self.expression, sp, self.IS, indent(sp, self.pattern)) + + @rule + def primary_expression(self) -> Rule: + return ( + self.identifier_expression + | self.literal_expression + | self.SELF + | seq(self.BANG, self.primary_expression) + | seq(self.MINUS, self.primary_expression) + | self.block + | self.conditional_expression + | self.list_constructor_expression + | self.object_constructor_expression + | self.match_expression + | seq(self.primary_expression, self.LPAREN, self.RPAREN) + | group( + self.primary_expression, + self.LPAREN, + indent(nl, self._expression_list), + nl, + self.RPAREN, + ) + | group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER)) + | group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN) + ) + + @rule("IdentifierExpression") + def identifier_expression(self): + return self.IDENTIFIER + + @rule("Literal") + def literal_expression(self): + return self.NUMBER | self.STRING | self.TRUE | self.FALSE + + @rule("ConditionalExpression") + def conditional_expression(self) -> Rule: + return ( + seq(group(self.IF, sp, self.expression), sp, self.block) + | seq( + group(self.IF, sp, self.expression), + sp, + self.block, + sp, + self.ELSE, + sp, + self.conditional_expression, + ) + | seq( + group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block + ) + ) + + @rule + def list_constructor_expression(self) -> Rule: + return alt( + group(self.LSQUARE, nl, self.RSQUARE), + group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE), + ) + + @rule + def _expression_list(self) -> Rule: + return ( + self.expression + | seq(self.expression, self.COMMA) + | seq(self.expression, self.COMMA, sp, self._expression_list) + ) + + @rule + def match_expression(self) -> Rule: + return group( + group(self.MATCH, sp, self.expression, sp, self.LCURLY), + indent(sp, self.match_arms), + sp, + self.RCURLY, + ) + + @rule("MatchArms") + def match_arms(self) -> Rule: + return self._match_arms + + @rule + def _match_arms(self) -> Rule: + return ( + self.match_arm + | seq(self.match_arm, self.COMMA) + | seq(self.match_arm, self.COMMA, br, self._match_arms) + ) + + @rule("MatchArm") + def match_arm(self) -> Rule: + return group(self.pattern, sp, self.ARROW, sp, self.expression) + + @rule("Pattern") + def pattern(self) -> Rule: + return ( + group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression) + | group(self.variable_binding, self._pattern_core) + | self._pattern_core + ) + + @rule + def _pattern_core(self) -> Rule: + return self.type_expression | self.wildcard_pattern + + @rule("WildcardPattern") + def wildcard_pattern(self) -> Rule: + return self.UNDERSCORE + + @rule("VariableBinding") + def variable_binding(self) -> Rule: + return seq(self.IDENTIFIER, self.COLON) + + @rule + def object_constructor_expression(self) -> Rule: + return group(self.NEW, sp, self.type_identifier, sp, self.field_list) + + @rule + def field_list(self) -> Rule: + return alt( + seq(self.LCURLY, self.RCURLY), + group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY), + ) + + @rule + def field_values(self) -> Rule: + return ( + self.field_value + | seq(self.field_value, self.COMMA) + | seq(self.field_value, self.COMMA, sp, self.field_values) + ) + + @rule + def field_value(self) -> Rule: + return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression)) + + BLANKS = Terminal(Re.set(" ", "\t").plus()) + LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine) + COMMENT = Terminal( + Re.seq(Re.literal("//"), Re.set("\n").invert().star()), + highlight=highlight.comment.line, + trivia_mode=TriviaMode.LineComment, ) -@rule -def list_constructor_expression() -> Rule: - return alt( - group(LSQUARE, nl, RSQUARE), - group(LSQUARE, indent(nl, _expression_list), nl, RSQUARE), - ) - -@rule -def _expression_list() -> Rule: - return ( - expression - | seq(expression, COMMA) - | seq(expression, COMMA, sp, _expression_list) - ) - -@rule -def match_expression() -> Rule: - return group( - group(MATCH, sp, expression, sp, LCURLY), - indent(sp, match_arms), - sp, - RCURLY, - ) - -@rule("MatchArms") -def match_arms() -> Rule: - return _match_arms - -@rule -def _match_arms() -> Rule: - return ( - match_arm - | seq(match_arm, COMMA) - | seq(match_arm, COMMA, br, _match_arms) - ) - -@rule("MatchArm") -def match_arm() -> Rule: - return group(pattern, sp, ARROW, sp, expression) - -@rule("Pattern") -def pattern() -> Rule: - return ( - group(variable_binding, _pattern_core, sp, AND, sp, expression) - | group(variable_binding, _pattern_core) - | _pattern_core - ) - -@rule -def _pattern_core() -> Rule: - return type_expression | wildcard_pattern - -@rule("WildcardPattern") -def wildcard_pattern() -> Rule: - return UNDERSCORE - -@rule("VariableBinding") -def variable_binding() -> Rule: - return seq(IDENTIFIER, COLON) - -@rule -def object_constructor_expression() -> Rule: - return group(NEW, sp, type_identifier, sp, field_list) - -@rule -def field_list() -> Rule: - return alt( - seq(LCURLY, RCURLY), - group(LCURLY, indent(nl, field_values), nl, RCURLY), - ) - -@rule -def field_values() -> Rule: - return ( - field_value - | seq(field_value, COMMA) - | seq(field_value, COMMA, sp, field_values) - ) - -@rule -def field_value() -> Rule: - return IDENTIFIER | group(IDENTIFIER, COLON, indent(sp, expression)) - -BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus()) -LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine) -COMMENT = Terminal( - "COMMENT", - Re.seq(Re.literal("//"), Re.set("\n").invert().star()), - highlight=highlight.comment.line, - trivia_mode=TriviaMode.LineComment, -) - -ARROW = Terminal("ARROW", "->", highlight=highlight.keyword.operator) -AS = Terminal("AS", "as", highlight=highlight.keyword.operator.expression) -BAR = Terminal("BAR", "|", highlight=highlight.keyword.operator.expression) -CLASS = Terminal("CLASS", "class", highlight=highlight.storage.type.klass) -COLON = Terminal("COLON", ":", highlight=highlight.punctuation.separator) -ELSE = Terminal("ELSE", "else", highlight=highlight.keyword.control.conditional) -FOR = Terminal("FOR", "for", highlight=highlight.keyword.control) -FUN = Terminal("FUN", "fun", highlight=highlight.storage.type.function) -IDENTIFIER = Terminal( - "IDENTIFIER", - Re.seq( - Re.set(("a", "z"), ("A", "Z"), "_"), - Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), - ), -) -IF = Terminal("IF", "if", highlight=highlight.keyword.control.conditional) -IMPORT = Terminal("IMPORT", "import", highlight=highlight.keyword.other) -IN = Terminal("IN", "in", highlight=highlight.keyword.operator) -LCURLY = Terminal("LCURLY", "{", highlight=highlight.punctuation.curly_brace.open) -RCURLY = Terminal("RCURLY", "}", highlight=highlight.punctuation.curly_brace.close) -LET = Terminal("LET", "let", highlight=highlight.keyword.other) -RETURN = Terminal("RETURN", "return", highlight=highlight.keyword.control) -SEMICOLON = Terminal("SEMICOLON", ";", highlight=highlight.punctuation.separator) -STRING = Terminal( - "STRING", - # Double-quoted string. - Re.seq( - Re.literal('"'), - (~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(), - Re.literal('"'), - ) - # Single-quoted string. - | Re.seq( - Re.literal("'"), - (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), - Re.literal("'"), - ), - highlight=highlight.string.quoted, -) -WHILE = Terminal("WHILE", "while", highlight=highlight.keyword.control) -EQUAL = Terminal("EQUAL", "=", highlight=highlight.keyword.operator.expression) -LPAREN = Terminal("LPAREN", "(", highlight=highlight.punctuation.parenthesis.open) -RPAREN = Terminal("RPAREN", ")", highlight=highlight.punctuation.parenthesis.close) -COMMA = Terminal("COMMA", ",", highlight=highlight.punctuation.separator) -SELF = Terminal("SELFF", "self", highlight=highlight.variable.language) -OR = Terminal("OR", "or", highlight=highlight.keyword.operator.expression) -IS = Terminal("IS", "is", highlight=highlight.keyword.operator.expression) -AND = Terminal("AND", "and", highlight=highlight.keyword.operator.expression) -EQUALEQUAL = Terminal("EQUALEQUAL", "==", highlight=highlight.keyword.operator.expression) -BANGEQUAL = Terminal("BANGEQUAL", "!=", highlight=highlight.keyword.operator.expression) -LESS = Terminal("LESS", "<", highlight=highlight.keyword.operator.expression) -GREATER = Terminal("GREATER", ">", highlight=highlight.keyword.operator.expression) -LESSEQUAL = Terminal("LESSEQUAL", "<=", highlight=highlight.keyword.operator.expression) -GREATEREQUAL = Terminal("GREATEREQUAL", ">=", highlight=highlight.keyword.operator.expression) -PLUS = Terminal("PLUS", "+", highlight=highlight.keyword.operator.expression) -MINUS = Terminal("MINUS", "-", highlight=highlight.keyword.operator.expression) -STAR = Terminal("STAR", "*", highlight=highlight.keyword.operator.expression) -SLASH = Terminal("SLASH", "/", highlight=highlight.keyword.operator.expression) -NUMBER = Terminal( - "NUMBER", - Re.seq( - Re.set(("0", "9")).plus(), + ARROW = Terminal("->", highlight=highlight.keyword.operator) + AS = Terminal("as", highlight=highlight.keyword.operator.expression) + BAR = Terminal("|", highlight=highlight.keyword.operator.expression) + CLASS = Terminal("class", highlight=highlight.storage.type.klass) + COLON = Terminal(":", highlight=highlight.punctuation.separator) + ELSE = Terminal("else", highlight=highlight.keyword.control.conditional) + FOR = Terminal("for", highlight=highlight.keyword.control) + FUN = Terminal("fun", highlight=highlight.storage.type.function) + IDENTIFIER = Terminal( Re.seq( - Re.literal("."), - Re.set(("0", "9")).plus(), - ).question(), + Re.set(("a", "z"), ("A", "Z"), "_"), + Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + ), + ) + IF = Terminal("if", highlight=highlight.keyword.control.conditional) + IMPORT = Terminal("import", highlight=highlight.keyword.other) + IN = Terminal("in", highlight=highlight.keyword.operator) + LCURLY = Terminal("{", highlight=highlight.punctuation.curly_brace.open) + RCURLY = Terminal("}", highlight=highlight.punctuation.curly_brace.close) + LET = Terminal("let", highlight=highlight.keyword.other) + RETURN = Terminal("return", highlight=highlight.keyword.control) + SEMICOLON = Terminal(";", highlight=highlight.punctuation.separator) + STRING = Terminal( + # Double-quoted string. + Re.seq( + Re.literal('"'), + (~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(), + Re.literal('"'), + ) + # Single-quoted string. + | Re.seq( + Re.literal("'"), + (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), + Re.literal("'"), + ), + highlight=highlight.string.quoted, + ) + WHILE = Terminal("while", highlight=highlight.keyword.control) + EQUAL = Terminal("=", highlight=highlight.keyword.operator.expression) + LPAREN = Terminal("(", highlight=highlight.punctuation.parenthesis.open) + RPAREN = Terminal(")", highlight=highlight.punctuation.parenthesis.close) + COMMA = Terminal(",", highlight=highlight.punctuation.separator) + SELF = Terminal("self", name="SELFF", highlight=highlight.variable.language) + OR = Terminal("or", highlight=highlight.keyword.operator.expression) + IS = Terminal("is", highlight=highlight.keyword.operator.expression) + AND = Terminal("and", highlight=highlight.keyword.operator.expression) + EQUALEQUAL = Terminal("==", highlight=highlight.keyword.operator.expression) + BANGEQUAL = Terminal("!=", highlight=highlight.keyword.operator.expression) + LESS = Terminal("<", highlight=highlight.keyword.operator.expression) + GREATER = Terminal(">", highlight=highlight.keyword.operator.expression) + LESSEQUAL = Terminal("<=", highlight=highlight.keyword.operator.expression) + GREATEREQUAL = Terminal(">=", highlight=highlight.keyword.operator.expression) + PLUS = Terminal("+", highlight=highlight.keyword.operator.expression) + MINUS = Terminal("-", highlight=highlight.keyword.operator.expression) + STAR = Terminal("*", highlight=highlight.keyword.operator.expression) + SLASH = Terminal("/", highlight=highlight.keyword.operator.expression) + NUMBER = Terminal( Re.seq( - Re.set("e", "E"), - Re.set("+", "-").question(), Re.set(("0", "9")).plus(), - ).question(), - ), - highlight=highlight.constant.numeric, -) -TRUE = Terminal("TRUE", "true", highlight=highlight.constant.language) -FALSE = Terminal("FALSE", "false", highlight=highlight.constant.language) -BANG = Terminal("BANG", "!", highlight=highlight.keyword.operator.expression) -DOT = Terminal("DOT", ".", highlight=highlight.punctuation.separator) -MATCH = Terminal("MATCH", "match", highlight=highlight.keyword.other) -EXPORT = Terminal("EXPORT", "export", highlight=highlight.keyword.other) -UNDERSCORE = Terminal("UNDERSCORE", "_", highlight=highlight.variable.language) -NEW = Terminal("NEW", "new", highlight=highlight.keyword.operator) -LSQUARE = Terminal("LSQUARE", "[", highlight=highlight.punctuation.square_bracket.open) -RSQUARE = Terminal("RSQUARE", "]", highlight=highlight.punctuation.square_bracket.close) + Re.seq( + Re.literal("."), + Re.set(("0", "9")).plus(), + ).question(), + Re.seq( + Re.set("e", "E"), + Re.set("+", "-").question(), + Re.set(("0", "9")).plus(), + ).question(), + ), + highlight=highlight.constant.numeric, + ) + TRUE = Terminal("true", highlight=highlight.constant.language) + FALSE = Terminal("false", highlight=highlight.constant.language) + BANG = Terminal("!", highlight=highlight.keyword.operator.expression) + DOT = Terminal(".", highlight=highlight.punctuation.separator) + MATCH = Terminal("match", highlight=highlight.keyword.other) + EXPORT = Terminal("export", highlight=highlight.keyword.other) + UNDERSCORE = Terminal("_", highlight=highlight.variable.language) + NEW = Terminal("new", highlight=highlight.keyword.operator) + LSQUARE = Terminal("[", highlight=highlight.punctuation.square_bracket.open) + RSQUARE = Terminal("]", highlight=highlight.punctuation.square_bracket.close) -FineGrammar=Grammar( - start=file, - trivia=[BLANKS, LINE_BREAK, COMMENT], - pretty_indent=" ", - precedence=[ - (Assoc.RIGHT, [EQUAL]), - (Assoc.LEFT, [OR]), - (Assoc.LEFT, [IS]), - (Assoc.LEFT, [AND]), - (Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]), - (Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]), - (Assoc.LEFT, [PLUS, MINUS]), - (Assoc.LEFT, [STAR, SLASH]), - (Assoc.LEFT, [primary_expression]), - (Assoc.LEFT, [LPAREN]), - (Assoc.LEFT, [DOT]), - # - # If there's a confusion about whether to make an IF - # statement or an expression, prefer the statement. - # - (Assoc.NONE, [if_statement]), - ], -) if __name__ == "__main__": from pathlib import Path @@ -520,7 +525,7 @@ if __name__ == "__main__": from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries # TODO: Actually generate a lexer/parser for some runtime. - grammar = FineGrammar + grammar = FineGrammar() table = grammar.build_table() # print(table.format()) diff --git a/lrparser.mk b/lrparser.mk index 1f8ae98..1c10d26 100644 --- a/lrparser.mk +++ b/lrparser.mk @@ -1,5 +1,5 @@ # This file generated by makedep.py -VERSION=0.8 +VERSION=0.7.9 PYTHON_SOURCES=\ parser/tree_sitter.py \ parser/runtime.py \ diff --git a/parser/emacs.py b/parser/emacs.py index 6ab3036..1a73d88 100644 --- a/parser/emacs.py +++ b/parser/emacs.py @@ -25,6 +25,8 @@ class FaceQuery: def gather_faces(grammar: parser.Grammar): + nts = {nt.name: nt for nt in grammar.non_terminals()} + def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]: parts = [] for item in input: @@ -50,12 +52,13 @@ def gather_faces(grammar: parser.Grammar): ) ) - elif isinstance(item, parser.NonTerminal): - if item.transparent: - if item.name in visited: + elif isinstance(item, str): + nt = nts[item] + if nt.transparent: + if nt.name in visited: continue - visited.add(item.name) - body = item.definition + visited.add(nt.name) + body = nt.fn(grammar) for production in body.flatten(with_metadata=True): parts.extend(scoop(node, production, visited)) @@ -66,7 +69,7 @@ def gather_faces(grammar: parser.Grammar): if rule.transparent: continue - body = rule.definition + body = rule.fn(grammar) for production in body.flatten(with_metadata=True): queries.extend(scoop(rule.name, production, set())) diff --git a/parser/parser.py b/parser/parser.py index a54da0f..320ce79 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -17,24 +17,25 @@ the thing that processes the tables. ## Making Grammars -Define a series of terminals (with `Terminal`) and rules (as functions decorated -with `@rule`), and then pass the starting rule to the constructor of a `Grammar` -object: +To get started, create a grammar that derives from the `Grammar` class. Create +one method per nonterminal, decorated with the `rule` decorator. Here's an +example: - @rule - def expression(self): - return seq(self.expression, self.PLUS, self.term) | self.term - @rule - def term(self): - return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID + class SimpleGrammar(Grammar): + @rule + def expression(self): + return seq(self.expression, self.PLUS, self.term) | self.term - PLUS = Terminal('+') - LPAREN = Terminal('(') - RPAREN = Terminal(')') - ID = Terminal('id') + @rule + def term(self): + return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID + + PLUS = Terminal('+') + LPAREN = Terminal('(') + RPAREN = Terminal(')') + ID = Terminal('id') - grammar = Grammar(start=expression) ## Using grammars @@ -1532,9 +1533,7 @@ class ParserGenerator: return builder.flush(config_sets) -FlattenedWithMetadata = list[ - "NonTerminal|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]" -] +FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"] ############################################################################### @@ -1579,32 +1578,26 @@ class Rule: class Terminal(Rule): """A token, or terminal symbol in the grammar.""" - name: str + name: str | None pattern: "str | Re" meta: dict[str, typing.Any] regex: bool error_name: str | None - definition_location: str def __init__( self, - name: str, pattern: "str|Re", *, + name: str | None = None, error_name: str | None = None, **kwargs, ): - # TODO: Consider identifying the name from some kind of globals - # dictionary or something if necessary. self.name = name self.pattern = pattern self.meta = kwargs self.regex = isinstance(pattern, Re) self.error_name = error_name - caller = inspect.stack()[1] - self.definition_location = f"{caller.filename}:{caller.lineno}" - def flatten( self, with_metadata: bool = False ) -> typing.Generator[FlattenedWithMetadata, None, None]: @@ -1624,17 +1617,14 @@ class NonTerminal(Rule): grammar class. """ - fn: typing.Callable[[], Rule] + fn: typing.Callable[["Grammar"], Rule] name: str transparent: bool error_name: str | None - definition_location: str - _definition: Rule | None - _body: "list[list[NonTerminal | Terminal]] | None" def __init__( self, - fn: typing.Callable[[], Rule], + fn: typing.Callable[["Grammar"], Rule], name: str | None = None, transparent: bool = False, error_name: str | None = None, @@ -1655,37 +1645,22 @@ class NonTerminal(Rule): self.name = name or fn.__name__ self.transparent = transparent self.error_name = error_name - self._definition = None - self._body = None - caller = inspect.stack()[1] - self.definition_location = f"{caller.filename}:{caller.lineno}" + def generate_body(self, grammar) -> list[list[str | Terminal]]: + """Generate the body of the non-terminal. - @property - def definition(self) -> Rule: - """The rule that is the definition of this nonterminal. - - (As opposed this rule itself, which is... itself.) - """ - if self._definition is None: - self._definition = self.fn() - return self._definition - - @property - def body(self) -> "list[list[NonTerminal | Terminal]]": - """The flattened body of the nonterminal: a list of productions where - each production is a sequence of Terminals and NonTerminals. + We do this by first calling the associated function in order to get a + Rule, and then flattening the Rule into the associated set of + productions. We strip the metadata from the flattened result to make + life a little easier for the caller. """ - def without_metadata(result: FlattenedWithMetadata) -> list[NonTerminal | Terminal]: + def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]: for item in result: assert not isinstance(item, tuple) - return typing.cast(list[NonTerminal | Terminal], result) + return typing.cast(list[str | Terminal], result) - if self._body is None: - self._body = [without_metadata(rule) for rule in self.fn().flatten(with_metadata=False)] - - return self._body + return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)] def flatten( self, with_metadata: bool = False @@ -1694,7 +1669,7 @@ class NonTerminal(Rule): # the context of some other production. Yield ourselves, and trust that # in time we will be asked to generate our body. del with_metadata - yield [self] + yield [self.name] class AlternativeRule(Rule): @@ -1800,7 +1775,7 @@ def mark(rule: Rule, **kwargs) -> Rule: @typing.overload -def rule(f: typing.Callable, /) -> NonTerminal: ... +def rule(f: typing.Callable, /) -> Rule: ... @typing.overload @@ -1808,15 +1783,16 @@ def rule( name: str | None = None, transparent: bool | None = None, error_name: str | None = None, -) -> typing.Callable[[typing.Callable[[], Rule]], NonTerminal]: ... +) -> typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]: ... def rule( name: str | None | typing.Callable = None, transparent: bool | None = None, error_name: str | None = None, -) -> NonTerminal | typing.Callable[[typing.Callable[[], Rule]], NonTerminal]: - """The decorator that marks a function as a nonterminal rule. +) -> Rule | typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]: + """The decorator that marks a method in a Grammar object as a nonterminal + rule. As with all the best decorators, it can be called with or without arguments. If called with one argument, that argument is a name that overrides the name @@ -1825,7 +1801,7 @@ def rule( if callable(name): return rule()(name) - def wrapper(f: typing.Callable[[], Rule]): + def wrapper(f: typing.Callable[[typing.Any], Rule]): nonlocal name nonlocal transparent nonlocal error_name @@ -2770,150 +2746,145 @@ class TriviaMode(enum.Enum): ############################################################################### -# Finally, the grammar class. +# Finally, the base class for grammars ############################################################################### -PrecedenceList = list[typing.Tuple[Assoc, list[Terminal|NonTerminal]]] - -def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str,NonTerminal], dict[str,Terminal]]: - """Starting from the given NonTerminal, gather all of the symbols - (NonTerminals and Terminals) that make up the grammar. - """ - # NOTE: We use a dummy dictionary here to preserve insertion order. - # That way the first element in named_rules is always the start - # symbol! - rules: dict[NonTerminal, int] = {} - terminals: dict[Terminal, int] = {} - - # STEP 1 is to just gather all of the symbols that we can find. - queue: list[NonTerminal] = [start] - while len(queue) > 0: - nt = queue.pop() - if nt in rules: - continue - - # TODO: Here we can track modules (via the funcitons that make up - # nonterminals, maybe) and maybe use that to infer terminal - # names. - rules[nt] = len(rules) - - for rule in nt.body: - for symbol in rule: - if isinstance(symbol, NonTerminal): - if symbol not in rules: - queue.append(symbol) - - elif isinstance(symbol, Terminal): - terminals[symbol] = len(terminals) - - else: - typing.assert_never(symbol) - - # (Terminals are also reachable!) - for symbol in trivia: - terminals[symbol] = len(terminals) - - # Step 2 is to organize all of these things and check them for errors. - named_rules: dict[str, NonTerminal] = {} - for rule in rules: - existing = named_rules.get(rule.name) - if existing is not None: - # TODO TEST - raise ValueError(f"""Found more than one rule named {rule.name}: -- {existing.definition_location} -- {rule.definition_location}""") - named_rules[rule.name] = rule - - named_terminals: dict[str, Terminal] = {} - for terminal in terminals: - existing = named_terminals.get(terminal.name) - if existing is not None: - # TODO TEST - raise ValueError(f"""Found more than one terminal named {terminal.name}: -- {existing.definition_location} -- {terminal.definition_location}""") - - existing_rule = named_rules.get(terminal.name) - if existing_rule is not None: - # TODO TEST - raise ValueError(f"""Found a terminal and a rule both named {terminal.name}: -- The rule was defined at {existing_rule.definition_location} -- The terminal was defined at {terminal.definition_location}""") - - named_terminals[terminal.name] = terminal - - return (named_rules, named_terminals) +PrecedenceList = list[typing.Tuple[Assoc, list[Rule | str]]] class Grammar: - """A container that holds all the terminals and nonterminals for a - given grammar. The terminals and nonterminals are defined elsewhere; - provide the starting rule and this object will build the grammar from - everything accessible. + """The base class for defining a grammar. + + Inherit from this, and and define members for your nonterminals, and then + use the `build_table` method to construct the parse tables. + Here's an example of a simple grammar: - @rule - def expression(self): - return seq(self.expression, self.PLUS, self.term) | self.term + class SimpleGrammar(Grammar): + @rule + def expression(self): + return seq(self.expression, self.PLUS, self.term) | self.term - @rule - def term(self): - return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID + @rule + def term(self): + return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID - PLUS = Terminal('+') - LPAREN = Terminal('(') - RPAREN = Terminal(')') - ID = Terminal('id') + PLUS = Terminal('+') + LPAREN = Terminal('(') + RPAREN = Terminal(')') + ID = Terminal('id') - grammar = Grammar(start=expression) Not very exciting, perhaps, but it's something. """ - start: NonTerminal - name: str - pretty_indent: str | None + _precedence: dict[str, typing.Tuple[Assoc, int]] + _generator: type[ParserGenerator] _terminals: dict[str, Terminal] _nonterminals: dict[str, NonTerminal] _trivia: list[Terminal] - _precedence: dict[str, typing.Tuple[Assoc, int]] def __init__( self, - start: NonTerminal, + start: str | NonTerminal | None = None, precedence: PrecedenceList | None = None, - trivia: list[Terminal] | None = None, + generator: type[ParserGenerator] | None = None, + trivia: list[str | Terminal] | None = None, name: str | None = None, - pretty_indent: str | None = None, ): - if start.transparent: - # TODO: TEST - raise ValueError("The start rule cannot be transparent") + if start is None: + start = getattr(self, "start", None) + if start is None: + raise ValueError( + "The default start rule must either be specified in the constructor or as an " + "attribute in the class." + ) + if isinstance(start, NonTerminal): + start = start.name if precedence is None: - precedence = [] + precedence = getattr(self, "precedence", []) assert precedence is not None + if generator is None: + generator = getattr(self, "generator", ParserGenerator) + assert generator is not None + if trivia is None: - trivia = [] + trivia = getattr(self, "trivia", []) assert trivia is not None + # Fixup terminal names with the name of the member that declared it. + terminals = {} + for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)): + if t.name is None: + t.name = n + + if n in terminals: + raise ValueError(f"More than one terminal has the name '{n}'") + terminals[n] = t + + # Get the nonterminals. + nonterminals = {} + for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)): + if nt.name in nonterminals: + raise ValueError(f"More than one nonterminal found with the name '{nt.name}'") + + if nt.name in terminals: + raise ValueError( + f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule" + ) + + nonterminals[nt.name] = nt + + # Resolve the trivia declarations correctly. + resolved_trivia: list[Terminal] = [] + for t in trivia: + if isinstance(t, str): + resolved = terminals.get(t) + if resolved is None: + raise ValueError(f"The trivia '{t}' is not a terminal name") + resolved_trivia.append(resolved) + elif isinstance(t, Terminal): + resolved_trivia.append(t) + else: + raise ValueError(f"{t} must be either a terminal name or literally a terminal") + # Fix up the precedence table. precedence_table = {} for prec, (associativity, symbols) in enumerate(precedence): for symbol in symbols: - precedence_table[symbol.name] = (associativity, prec + 1) + key = None + if isinstance(symbol, Terminal): + key = symbol.name + if key is None: + raise ValueError(f"{symbol} is a terminal that has not had a name set yet") + elif isinstance(symbol, NonTerminal): + key = symbol.name + elif isinstance(symbol, str): + if symbol in terminals or symbol in nonterminals: + key = symbol + + if key is None: + raise ValueError( + f"{symbol} must be either a Token or a NonTerminal, or the name of one" + ) + + precedence_table[key] = (associativity, prec + 1) if name is None: - name = "unknown" + name = getattr(self, "name", None) + if name is None: + name = self.__class__.__name__.removesuffix("Grammar").lower() - self.start = start - self.name = name - self._nonterminals, self._terminals = gather_grammar(start, trivia) - self._trivia = trivia self._precedence = precedence_table - self.pretty_indent = pretty_indent + self.start = start + self._generator = generator + self._terminals = terminals + self._nonterminals = nonterminals + self._trivia = resolved_trivia + self.name = name def terminals(self) -> list[Terminal]: return list(self._terminals.values()) @@ -2927,7 +2898,55 @@ class Grammar: def get_precedence(self, name: str) -> None | tuple[Assoc, int]: return self._precedence.get(name) - def desugar(self) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]: + # TODO: The flattened form should retain NonTerminal, not just str. + def generate_nonterminal_dict( + self, start: str | None = None + ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]: + """Convert the rules into a dictionary of productions, and a set of + the names of transparent nonterminals. + + Our table generators work on a very flat set of productions. This is the + first step in flattening the productions from the members: walk the rules + starting from the given start rule and flatten them, one by one, into a + dictionary that maps nonterminal rule name to its associated list of + productions. + """ + if start is None: + start = self.start + + nonterminals = self._nonterminals + transparents = {rule.name for rule in nonterminals.values() if rule.transparent} + + grammar = {} + + rule = nonterminals.get(start) + if rule is None: + raise ValueError(f"Cannot find a rule named '{start}'") + if rule.transparent: + raise ValueError("The start rule cannot be transparent") + queue = [rule] + while len(queue) > 0: + rule = queue.pop() + if rule.name in grammar: + continue + + body = rule.generate_body(self) + for clause in body: + for symbol in clause: + if not isinstance(symbol, Terminal): + assert isinstance(symbol, str) + nonterminal = nonterminals.get(symbol) + if nonterminal is None: + raise ValueError(f"While processing {rule.name}: cannot find {symbol}") + queue.append(nonterminal) + + grammar[rule.name] = body + + return (grammar, transparents) + + def desugar( + self, start: str | None = None + ) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]: """Convert the rules into a flat list of productions. Our table generators work from a very flat set of productions. The form @@ -2935,27 +2954,37 @@ class Grammar: generate_nonterminal_dict- less useful to people, probably, but it is the input form needed by the Generator. """ - grammar: list[tuple[str,list[str]]] = [ - (rule.name, [s.name for s in production]) - for rule in self._nonterminals.values() - for production in rule.body - ] - assert grammar[0][0] == self.start.name + temp_grammar, transparents = self.generate_nonterminal_dict(start) - transparents = {name for name, rule in self._nonterminals.items() if rule.transparent} + grammar = [] + for rule_name, clauses in temp_grammar.items(): + for clause in clauses: + new_clause = [] + for symbol in clause: + if isinstance(symbol, Terminal): + if symbol.name in temp_grammar: + raise ValueError( + f"'{symbol.name}' is the name of both a Terminal and a NonTerminal rule. This will cause problems." + ) + new_clause.append(symbol.name) + else: + new_clause.append(symbol) + + grammar.append((rule_name, new_clause)) return grammar, transparents - def build_table(self) -> ParseTable: - """Construct a parse table for this grammar.""" - desugared, transparents = self.desugar() + def build_table(self, start: str | None = None, generator=None) -> ParseTable: + """Construct a parse table for this grammar, starting at the named + nonterminal rule. + """ + if start is None: + start = self.start + desugared, transparents = self.desugar(start) - gen = ParserGenerator( - self.start.name, - desugared, - precedence=self._precedence, - transparents=transparents, - ) + if generator is None: + generator = self._generator + gen = generator(start, desugared, precedence=self._precedence, transparents=transparents) table = gen.gen_table() for t in self._trivia: diff --git a/parser/tree_sitter.py b/parser/tree_sitter.py index 683ea16..7f9d231 100644 --- a/parser/tree_sitter.py +++ b/parser/tree_sitter.py @@ -263,7 +263,8 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): if rule.transparent: rule_name = "_" + rule_name - rule_definition = convert_to_tree_sitter(rule.definition, grammar) + body = rule.fn(grammar) + rule_definition = convert_to_tree_sitter(body, grammar) if rule_definition is None: raise Exception(f"Tree-sitter does not support the empty rule {rule_name}") rule_definition = apply_precedence(rule_definition, rule.name, grammar) @@ -282,6 +283,7 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str): def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str): + nts = {nt.name: nt for nt in grammar.non_terminals()} scope_suffix = "." + grammar.name def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]: @@ -298,12 +300,13 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str): raise Exception("Highlight must come with a field name") # TODO parts.append(f"{field_name}: _ @{highlight.scope}{scope_suffix}") - elif isinstance(item, parser.NonTerminal): - if item.transparent: - if item.name in visited: + elif isinstance(item, str): + nt = nts[item] + if nt.transparent: + if nt.name in visited: continue - visited.add(item.name) - body = item.definition + visited.add(nt.name) + body = nt.fn(grammar) for production in body.flatten(with_metadata=True): parts.extend(scoop(production, visited)) @@ -314,7 +317,7 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str): if rule.transparent: continue - body = rule.definition + body = rule.fn(grammar) patterns = set() for production in body.flatten(with_metadata=True): # Scoop up the meta... diff --git a/parser/wadler/builder.py b/parser/wadler/builder.py index 1d2ea95..fa2e23c 100644 --- a/parser/wadler/builder.py +++ b/parser/wadler/builder.py @@ -79,7 +79,11 @@ class MatcherTable: newline_replace: dict[str, str] -def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable: +def _compile_nonterminal_matcher( + grammar: parser.Grammar, + nonterminals: dict[str, parser.NonTerminal], + rule: parser.NonTerminal, +) -> MatcherTable: """Generate a matcher table for a single nonterminal. See the docs for [MatcherTable] to understand the result. @@ -107,7 +111,7 @@ def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable: def compile_nonterminal(name: str, rule: parser.NonTerminal): if name not in visited: visited.add(name) - for production in rule.fn().flatten(with_metadata=True): + for production in rule.fn(grammar).flatten(with_metadata=True): trans_prod = compile_production(production) generated_grammar.append((name, trans_prod)) @@ -122,18 +126,19 @@ def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable: result = [] for item in production: - if isinstance(item, parser.NonTerminal): - if item.transparent: + if isinstance(item, str): + nt = nonterminals[item] + if nt.transparent: # If it's transparent then we make a new set of # productions that covers the contents of the # transparent nonterminal. - name = "xxx_" + item.name - compile_nonterminal(name, item) + name = "xxx_" + nt.name + compile_nonterminal(name, nt) result.append(name) else: # Otherwise it's a "token" in our input, named # "tree_{whatever}". - result.append(f"tree_{item.name}") + result.append(f"tree_{item}") elif isinstance(item, parser.Terminal): # If it's a terminal it will appear in our input as @@ -252,7 +257,7 @@ def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable: start_name = f"yyy_{rule.name}" compile_nonterminal(start_name, rule) - gen = parser.ParserGenerator(start_name, generated_grammar) + gen = grammar._generator(start_name, generated_grammar) parse_table = gen.gen_table() for (_, replacement), rule_name in newlines.items(): @@ -291,7 +296,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> matchers = {} if indent is None: - indent = grammar.pretty_indent + indent = getattr(grammar, "pretty_indent", None) if indent is None: indent = " " @@ -302,7 +307,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) -> trivia_mode[t.name] = mode for name, rule in nonterminals.items(): - matchers[name] = _compile_nonterminal_matcher(rule) + matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule) return PrettyTable( indent, diff --git a/pyproject.toml b/pyproject.toml index c4b4207..860dc74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ classifiers = [ "Private :: Do Not Upload", # Probably. "License :: OSI Approved :: MIT License", ] -version = "0.8" +version = "0.7.9" dependencies = [] requires-python = ">=3.12" readme = "README.md" diff --git a/sql.py b/sql.py index f849631..4ed749d 100644 --- a/sql.py +++ b/sql.py @@ -2,7 +2,6 @@ from parser import * NAME = Terminal( - "NAME", Re.seq( Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), @@ -10,7 +9,6 @@ NAME = Terminal( ) STRING = Terminal( - "STRING", Re.seq( Re.literal("'"), (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), @@ -20,7 +18,6 @@ STRING = Terminal( ) NUMBER = Terminal( - "NUMBER", Re.seq( Re.set(("0", "9")).plus(), Re.seq( @@ -36,118 +33,118 @@ NUMBER = Terminal( highlight=highlight.constant.numeric, ) -OR = Terminal("OR", "or") -AND = Terminal("AND", "and") -NOT = Terminal("NOT", "not") +OR = Terminal("or") +AND = Terminal("and") +NOT = Terminal("not") COMPARISON = Terminal( - "COMPARISON", Re.literal("=") | Re.literal("<>") | Re.literal("<") | Re.literal(">") | Re.literal("<=") - | Re.literal(">="), + | Re.literal(">=") ) -PLUS = Terminal("PLUS", "+") -MINUS = Terminal("MINUS", "-") -STAR = Terminal("STAR", "*") -SLASH = Terminal("SLASH", "/") +PLUS = Terminal("+") +MINUS = Terminal("-") +STAR = Terminal("*") +SLASH = Terminal("/") -ALL = Terminal("ALL", "all") -AMMSC = Terminal("AMMSC", "ammsc") -ANY = Terminal("ANY", "any") -AS = Terminal("AS", "as") -ASC = Terminal("ASC", "asc") -AUTHORIZATION = Terminal("AUTHORIZATION", "authorization") -BETWEEN = Terminal("BETWEEN", "between") -BY = Terminal("BY", "by") -CHARACTER = Terminal("CHARACTER", "character") -CHECK = Terminal("CHECK", "check") -CLOSE = Terminal("CLOSE", "close") -COMMIT = Terminal("COMMIT", "commit") -CONTINUE = Terminal("CONTINUE", "continue") -CREATE = Terminal("CREATE", "create") -CURRENT = Terminal("CURRENT", "current") -CURSOR = Terminal("CURSOR", "cursor") -DECIMAL = Terminal("DECIMAL", "decimal") -DECLARE = Terminal("DECLARE", "declare") -DEFAULT = Terminal("DEFAULT", "default") -DELETE = Terminal("DELETE", "delete") -DESC = Terminal("DESC", "desc") -DISTINCT = Terminal("DISTINCT", "distinct") -DOUBLE = Terminal("DOUBLE", "double") -ESCAPE = Terminal("ESCAPE", "escape") -EXISTS = Terminal("EXISTS", "exists") -FETCH = Terminal("FETCH", "fetch") -FLOAT = Terminal("FLOAT", "float") -FOR = Terminal("FOR", "for") -FOREIGN = Terminal("FOREIGN", "foreign") -FOUND = Terminal("FOUND", "found") -FROM = Terminal("FROM", "from") -GOTO = Terminal("GOTO", "goto") -GRANT = Terminal("GRANT", "grant") -GROUP = Terminal("GROUP", "group") -HAVING = Terminal("HAVING", "having") -IN = Terminal("IN", "in") -INDICATOR = Terminal("INDICATOR", "indicator") -INSERT = Terminal("INSERT", "insert") -INTEGER = Terminal("INTEGER", "integer") -INTO = Terminal("INTO", "into") -IS = Terminal("IS", "is") -KEY = Terminal("KEY", "key") -LANGUAGE = Terminal("LANGUAGE", "language") -LIKE = Terminal("LIKE", "like") -NULL = Terminal("NULL", "null") -NUMERIC = Terminal("NUMERIC", "numeric") -OF = Terminal("OF", "of") -ON = Terminal("ON", "on") -OPEN = Terminal("OPEN", "open") -OPTION = Terminal("OPTION", "option") -ORDER = Terminal("ORDER", "order") -PARAMETER = Terminal("PARAMETER", "parameter") -PRECISION = Terminal("PRECISION", "precision") -PRIMARY = Terminal("PRIMARY", "primary") -PRIVILEGES = Terminal("PRIVILEGES", "privileges") -PROCEDURE = Terminal("PROCEDURE", "procedure") -PUBLIC = Terminal("PUBLIC", "public") -REAL = Terminal("REAL", "real") -REFERENCES = Terminal("REFERENCES", "references") -ROLLBACK = Terminal("ROLLBACK", "rollback") -SCHEMA = Terminal("SCHEMA", "schema") -SELECT = Terminal("SELECT", "select") -SET = Terminal("SET", "set") -SMALLINT = Terminal("SMALLINT", "smallint") -SOME = Terminal("SOME", "some") -SQLCODE = Terminal("SQLCODE", "sqlcode") -SQLERROR = Terminal("SQLERROR", "sqlerror") -TABLE = Terminal("TABLE", "table") -TO = Terminal("TO", "to") -UNION = Terminal("UNION", "union") -UNIQUE = Terminal("UNIQUE", "unique") -UPDATE = Terminal("UPDATE", "update") -USER = Terminal("USER", "user") -VALUES = Terminal("VALUES", "values") -VIEW = Terminal("VIEW", "view") -WHENEVER = Terminal("WHENEVER", "whenever") -WHERE = Terminal("WHERE", "where") -WITH = Terminal("WITH", "with") -WORK = Terminal("WORK", "work") +precedence = [ + (Assoc.LEFT, ["OR"]), + (Assoc.LEFT, ["AND"]), + (Assoc.LEFT, ["NOT"]), + (Assoc.LEFT, ["COMPARISON"]), + (Assoc.LEFT, ["PLUS", "MINUS"]), + (Assoc.LEFT, ["STAR", "SLASH"]), + # TODO: Unary minus +] -SEMICOLON = Terminal("SEMICOLON", ";") -LPAREN = Terminal("LPAREN", "(") -RPAREN = Terminal("RPAREN", ")") -COMMA = Terminal("COMMA", ",") -EQUAL = Terminal("EQUAL", "=") -DOT = Terminal("DOT", ".") +ALL = Terminal("all") +AMMSC = Terminal("ammsc") +ANY = Terminal("any") +ASC = Terminal("asc") +AUTHORIZATION = Terminal("authorization") +BETWEEN = Terminal("between") +BY = Terminal("by") +CHARACTER = Terminal("character") +CHECK = Terminal("check") +CLOSE = Terminal("close") +COMMIT = Terminal("commit") +CONTINUE = Terminal("continue") +CREATE = Terminal("create") +CURRENT = Terminal("current") +CURSOR = Terminal("cursor") +DECIMAL = Terminal("decimal") +DECLARE = Terminal("declare") +DEFAULT = Terminal("default") +DELETE = Terminal("delete") +DESC = Terminal("desc") +DISTINCT = Terminal("distinct") +DOUBLE = Terminal("double") +ESCAPE = Terminal("escape") +EXISTS = Terminal("exists") +FETCH = Terminal("fetch") +FLOAT = Terminal("float") +FOR = Terminal("for") +FOREIGN = Terminal("foreign") +FOUND = Terminal("found") +FROM = Terminal("from") +GOTO = Terminal("goto") +GRANT = Terminal("grant") +GROUP = Terminal("group") +HAVING = Terminal("having") +IN = Terminal("in") +INDICATOR = Terminal("indicator") +INSERT = Terminal("insert") +INTEGER = Terminal("integer") +INTO = Terminal("into") +IS = Terminal("is") +KEY = Terminal("key") +LANGUAGE = Terminal("language") +LIKE = Terminal("like") +NULL = Terminal("null") +NUMERIC = Terminal("numeric") +OF = Terminal("of") +ON = Terminal("on") +OPEN = Terminal("open") +OPTION = Terminal("option") +ORDER = Terminal("order") +PARAMETER = Terminal("parameter") +PRECISION = Terminal("precision") +PRIMARY = Terminal("primary") +PRIVILEGES = Terminal("privileges") +PROCEDURE = Terminal("procedure") +PUBLIC = Terminal("public") +REAL = Terminal("real") +REFERENCES = Terminal("references") +ROLLBACK = Terminal("rollback") +SCHEMA = Terminal("schema") +SELECT = Terminal("select") +SET = Terminal("set") +SMALLINT = Terminal("smallint") +SOME = Terminal("some") +SQLCODE = Terminal("sqlcode") +SQLERROR = Terminal("sqlerror") +TABLE = Terminal("table") +TO = Terminal("to") +UNION = Terminal("union") +UNIQUE = Terminal("unique") +UPDATE = Terminal("update") +USER = Terminal("user") +VALUES = Terminal("values") +VIEW = Terminal("view") +WHENEVER = Terminal("whenever") +WHERE = Terminal("where") +WITH = Terminal("with") +WORK = Terminal("work") -BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus()) -LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine) -COMMENT = Terminal( - "COMMENT", - Re.seq(Re.literal("--"), Re.set("\n").invert().star()), - highlight=highlight.comment.line, - trivia_mode=TriviaMode.LineComment, -) +SEMICOLON = Terminal(";") +LPAREN = Terminal("(") +RPAREN = Terminal(")") +COMMA = Terminal(",") +EQUAL = Terminal("=") +DOT = Terminal(".") +AS = Terminal("as") @rule @@ -743,19 +740,3 @@ def user(): @rule def when_action(): return (GOTO + NAME) | CONTINUE - - -SQL = Grammar( - start=sql_list, - precedence=[ - (Assoc.LEFT, [OR]), - (Assoc.LEFT, [AND]), - (Assoc.LEFT, [NOT]), - (Assoc.LEFT, [COMPARISON]), - (Assoc.LEFT, [PLUS, MINUS]), - (Assoc.LEFT, [STAR, SLASH]), - # TODO: Unary minus - ], - trivia=[BLANKS, COMMENT, LINE_BREAK], - name="SQL", -) diff --git a/tests/test_error_recovery.py b/tests/test_error_recovery.py index dbb254a..96b5c49 100644 --- a/tests/test_error_recovery.py +++ b/tests/test_error_recovery.py @@ -11,141 +11,138 @@ import parser.runtime as runtime # Tests based on # https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html +class LGrammar(Grammar): + start = "File" + trivia = ["BLANKS"] -BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus()) - -TRUE = Terminal("TRUE", "true") -FALSE = Terminal("FALSE", "false") -INT = Terminal("INT", Re.set(("0", "9")).plus()) -FN = Terminal("FN", "fn") -ARROW = Terminal("ARROW", "->") -COMMA = Terminal("COMMA", ",") -LPAREN = Terminal("LPAREN", "(") -RPAREN = Terminal("RPAREN", ")") -LCURLY = Terminal("LCURLY", "{") -RCURLY = Terminal("RCURLY", "}") -COLON = Terminal("COLON", ":") -SEMICOLON = Terminal("SEMICOLON", ";") -LET = Terminal("LET", "let") -EQUAL = Terminal("EQUAL", "=") -RETURN = Terminal("RETURN", "return") -PLUS = Terminal("PLUS", "+") -MINUS = Terminal("MINUS", "-") -STAR = Terminal("STAR", "*") -SLASH = Terminal("SLASH", "/") - -NAME = Terminal( - "NAME", - Re.seq( - Re.set(("a", "z"), ("A", "Z"), "_"), - Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), - ), -) - - -@rule -def File(): - # TODO: Make lists easier - return _functions - -@rule -def _functions(): - return Function | (_functions + Function) - -@rule -def Function(): - return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block - -@rule -def ParamList(): - return LPAREN + opt(_parameters) + RPAREN - -@rule -def _parameters(): - # NOTE: The ungrammar in the reference does not talk about commas - # required between parameters so this massages it to make them - # required. Commas are in the list not the param, which is more - # awkward for processing but not terminally so. - return (Param + opt(COMMA)) | (Param + COMMA + _parameters) - -@rule -def Param(): - return NAME + COLON + TypeExpr - -@rule -def TypeExpr(): - return NAME - -@rule -def Block(): - return LCURLY + opt(_statements) + RCURLY - -@rule -def _statements(): - return Stmt | _statements + Stmt - -@rule -def Stmt(): - return StmtExpr | StmtLet | StmtReturn - -@rule -def StmtExpr(): - return Expr + SEMICOLON - -@rule -def StmtLet(): - return LET + NAME + EQUAL + Expr + SEMICOLON - -@rule -def StmtReturn(): - return RETURN + Expr + SEMICOLON - -@rule -def Expr(): - return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall - -@rule -def ExprLiteral(): - return INT | TRUE | FALSE - -@rule -def ExprName(): - return NAME - -@rule -def ExprParen(): - return LPAREN + Expr + RPAREN - -@rule -def ExprBinary(): - return Expr + (PLUS | MINUS | STAR | SLASH) + Expr - -@rule -def ExprCall(): - return Expr + ArgList - -@rule -def ArgList(): - return LPAREN + opt(_arg_star) + RPAREN - -@rule -def _arg_star(): - # Again, a deviation from the original. See _parameters. - return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star) - -LGrammar = Grammar( - start=File, - trivia=[BLANKS], # Need a little bit of disambiguation for the symbol involved. precedence = [ - (Assoc.LEFT, [PLUS, MINUS]), - (Assoc.LEFT, [STAR, SLASH]), - (Assoc.LEFT, [LPAREN]), - ], -) + (Assoc.LEFT, ["PLUS", "MINUS"]), + (Assoc.LEFT, ["STAR", "SLASH"]), + (Assoc.LEFT, ["LPAREN"]), + ] -L_PARSE_TABLE = LGrammar.build_table() -L_LEXER_TABLE = LGrammar.compile_lexer() + @rule + def File(self): + # TODO: Make lists easier + return self._functions + + @rule + def _functions(self): + return self.Function | (self._functions + self.Function) + + @rule + def Function(self): + return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block + + @rule + def ParamList(self): + return self.LPAREN + opt(self._parameters) + self.RPAREN + + @rule + def _parameters(self): + # NOTE: The ungrammar in the reference does not talk about commas required between parameters + # so this massages it to make them required. Commas are in the list not the param, which + # is more awkward for processing but not terminally so. + return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters) + + @rule + def Param(self): + return self.NAME + self.COLON + self.TypeExpr + + @rule + def TypeExpr(self): + return self.NAME + + @rule + def Block(self): + return self.LCURLY + opt(self._statements) + self.RCURLY + + @rule + def _statements(self): + return self.Stmt | self._statements + self.Stmt + + @rule + def Stmt(self): + return self.StmtExpr | self.StmtLet | self.StmtReturn + + @rule + def StmtExpr(self): + return self.Expr + self.SEMICOLON + + @rule + def StmtLet(self): + return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON + + @rule + def StmtReturn(self): + return self.RETURN + self.Expr + self.SEMICOLON + + @rule + def Expr(self): + return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall + + @rule + def ExprLiteral(self): + return self.INT | self.TRUE | self.FALSE + + @rule + def ExprName(self): + return self.NAME + + @rule + def ExprParen(self): + return self.LPAREN + self.Expr + self.RPAREN + + @rule + def ExprBinary(self): + return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr + + @rule + def ExprCall(self): + return self.Expr + self.ArgList + + @rule + def ArgList(self): + return self.LPAREN + opt(self._arg_star) + self.RPAREN + + @rule + def _arg_star(self): + # Again, a deviation from the original. See _parameters. + return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star) + + BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) + + TRUE = Terminal("true") + FALSE = Terminal("false") + INT = Terminal(Re.set(("0", "9")).plus()) + FN = Terminal("fn") + ARROW = Terminal("->") + COMMA = Terminal(",") + LPAREN = Terminal("(") + RPAREN = Terminal(")") + LCURLY = Terminal("{") + RCURLY = Terminal("}") + COLON = Terminal(":") + SEMICOLON = Terminal(";") + LET = Terminal("let") + EQUAL = Terminal("=") + RETURN = Terminal("return") + PLUS = Terminal("+") + MINUS = Terminal("-") + STAR = Terminal("*") + SLASH = Terminal("/") + + NAME = Terminal( + Re.seq( + Re.set(("a", "z"), ("A", "Z"), "_"), + Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + ), + ) + + +L_PARSE_TABLE = LGrammar().build_table() +L_LEXER_TABLE = LGrammar().compile_lexer() def test_matklad_one(): diff --git a/tests/test_grammar.py b/tests/test_grammar.py index 398b416..c12380b 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -1,5 +1,6 @@ import pytest +import parser import parser.runtime as runtime from parser import Grammar, seq, rule, Terminal @@ -39,68 +40,117 @@ def _tree(treeform, count=0) -> runtime.Tree | runtime.TokenValue: def test_lr0_lr0(): """An LR0 grammar should work with an LR0 generator.""" - PLUS = Terminal("+", "+") - LPAREN = Terminal("(", "(") - RPAREN = Terminal(")", ")") - IDENTIFIER = Terminal("id", "id") + class G(Grammar): + start = "E" + # generator = parser.GenerateLR0 - @rule - def E(): - return seq(E, PLUS, T) | T + @rule + def E(self): + return seq(self.E, self.PLUS, self.T) | self.T - @rule - def T(): - return seq(LPAREN, E, RPAREN) | IDENTIFIER + @rule + def T(self): + return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER - G = Grammar(start=E) + PLUS = Terminal("+", name="+") + LPAREN = Terminal("(", name="(") + RPAREN = Terminal(")", name=")") + IDENTIFIER = Terminal("id", name="id") - table = G.build_table() - tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN)) + table = G().build_table() + tree, errors = runtime.Parser(table).parse( + Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN) + ) assert errors == [] assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")"))) +def test_all_generators(): + """This grammar should work with everything honestly.""" + + class G(Grammar): + start = "E" + + @rule + def E(self): + return seq(self.E, self.PLUS, self.T) | self.T + + @rule + def T(self): + return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER + + PLUS = Terminal("+", name="+") + LPAREN = Terminal("(", name="(") + RPAREN = Terminal(")", name=")") + IDENTIFIER = Terminal("id", name="id") + + GENERATORS = [ + # parser.GenerateLR0, + # parser.GeneratePager, + parser.ParserGenerator, + ] + for generator in GENERATORS: + table = G().build_table(generator=generator) + tree, errors = runtime.Parser(table).parse( + Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN) + ) + + print("\n") + print(generator) + print(f"{table.format()}") + + assert errors == [] + assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")"))) + def test_grammar_aho_ullman_2(): - @rule - def S(): - return seq(X, X) + class TestGrammar(Grammar): + start = "S" - @rule - def X(): - return seq(A, X) | B + @rule + def S(self): + return seq(self.X, self.X) - A = Terminal("A", "a") - B = Terminal("B", "b") + @rule + def X(self): + return seq(self.A, self.X) | self.B - Grammar(start=S).build_table() + A = Terminal("a") + B = Terminal("b") + + TestGrammar().build_table(generator=parser.ParserGenerator) + # TestGrammar().build_table(generator=parser.GeneratePager) def test_fun_lalr(): - @rule - def S(): - return seq(V, E) - @rule - def E(): - return F | seq(E, PLUS, F) + class TestGrammar(Grammar): + start = "S" - @rule - def F(): - return V | INT | seq(LPAREN, E, RPAREN) + @rule + def S(self): + return seq(self.V, self.E) - @rule - def V(): - return ID + @rule + def E(self): + return self.F | seq(self.E, self.PLUS, self.F) - PLUS = Terminal("PLUS", "+") - INT = Terminal("INT", "int") - ID = Terminal("ID", "id") - LPAREN = Terminal("LPAREN", "(") - RPAREN = Terminal("RPAREN", ")") + @rule + def F(self): + return self.V | self.INT | seq(self.LPAREN, self.E, self.RPAREN) - Grammar(start=S).build_table() + @rule + def V(self): + return self.ID + + PLUS = Terminal("+") + INT = Terminal("int") + ID = Terminal("id") + LPAREN = Terminal("(") + RPAREN = Terminal(")") + + TestGrammar().build_table() def test_conflicting_names(): @@ -117,28 +167,43 @@ def test_conflicting_names(): to understand. """ - @rule("IDENTIFIER") - def identifier(): - return IDENTIFIER + class TestGrammar(Grammar): + start = "IDENTIFIER" - IDENTIFIER = Terminal("IDENTIFIER", "Identifier") + @rule("IDENTIFIER") + def identifier(self): + return self.IDENTIFIER + + IDENTIFIER = Terminal("Identifier") with pytest.raises(ValueError): - Grammar(start=identifier).build_table() + TestGrammar().build_table() def test_grammar_ignore_trivia(): - @rule - def sentence(): - return WORD | seq(sentence, WORD) + class G(Grammar): + start = "sentence" - WORD = Terminal("WORD", "blah") - BLANK = Terminal("BLANK", " ") + trivia = ["BLANK"] - table = Grammar(start=sentence, trivia=[BLANK]).build_table() + @rule + def sentence(self): + return self.WORD | seq(self.sentence, self.WORD) + + WORD = Terminal("blah") + BLANK = Terminal(" ") + + table = G().build_table() assert "BLANK" in table.trivia - tree, errors = runtime.Parser(table).parse(Tokens(WORD, BLANK, WORD, BLANK)) + tree, errors = runtime.Parser(table).parse( + Tokens( + G.WORD, + G.BLANK, + G.WORD, + G.BLANK, + ) + ) assert errors == [] assert tree == runtime.Tree( @@ -169,3 +234,135 @@ def test_grammar_ignore_trivia(): ), ), ) + + +def test_grammar_unknown_trivia(): + class G(Grammar): + start = "sentence" + + trivia = ["BLANK"] + + @rule + def sentence(self): + return self.WORD | seq(self.sentence, self.WORD) + + WORD = Terminal("blah") + + with pytest.raises(ValueError): + G().build_table() + + +def test_grammar_trivia_symbol(): + class G(Grammar): + start = "sentence" + + @rule + def sentence(self): + return self.WORD | seq(self.sentence, self.WORD) + + WORD = Terminal("blah") + BLANK = Terminal(" ") + + trivia = [BLANK] + + table = G().build_table() + assert "BLANK" in table.trivia + + +def test_grammar_trivia_constructor(): + class G(Grammar): + start = "sentence" + + def __init__(self): + super().__init__(trivia=[self.BLANK]) + + @rule + def sentence(self): + return self.WORD | seq(self.sentence, self.WORD) + + WORD = Terminal("blah") + BLANK = Terminal(" ") + + table = G().build_table() + assert "BLANK" in table.trivia + + +def test_grammar_trivia_constructor_string(): + class G(Grammar): + start = "sentence" + + def __init__(self): + super().__init__(trivia=["BLANK"]) + + @rule + def sentence(self): + return self.WORD | seq(self.sentence, self.WORD) + + WORD = Terminal("blah") + BLANK = Terminal(" ") + + table = G().build_table() + assert "BLANK" in table.trivia + + +def test_grammar_trivia_constructor_string_unknown(): + class G(Grammar): + start = "sentence" + + def __init__(self): + super().__init__(trivia=["BLANK"]) + + @rule + def sentence(self): + return self.WORD | seq(self.sentence, self.WORD) + + WORD = Terminal("blah") + + with pytest.raises(ValueError): + G().build_table() + + +def test_grammar_name_implicit(): + class FooGrammar(Grammar): + start = "x" + + @rule + def x(self): + return self.WORD + + WORD = Terminal("blah") + + assert FooGrammar().name == "foo" + + +def test_grammar_name_explicit_member(): + class FooGrammar(Grammar): + start = "x" + + name = "bar" + + @rule + def x(self): + return self.WORD + + WORD = Terminal("blah") + + assert FooGrammar().name == "bar" + + +def test_grammar_name_explicit_constructor(): + class FooGrammar(Grammar): + start = "x" + + name = "bar" + + def __init__(self): + super().__init__(name="baz") + + @rule + def x(self): + return self.WORD + + WORD = Terminal("blah") + + assert FooGrammar().name == "baz" diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 79fa499..ffff192 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -354,33 +354,32 @@ def test_edge_list_always_sorted(points: list[tuple[int, int]]): def test_lexer_compile(): - @rule - def foo(): - # NOTE: This is a hack to ensure the terminals are reachable. :P - return IS | AS | IDENTIFIER + class LexTest(Grammar): + @rule + def foo(self): + return self.IS - IS = Terminal("IS", "is") - AS = Terminal("AS", "as") - IDENTIFIER = Terminal( - "IDENTIFIER", - Re.seq( - Re.set(("a", "z"), ("A", "Z"), "_"), - Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + start = "foo" + + IS = Terminal("is") + AS = Terminal("as") + IDENTIFIER = Terminal( + Re.seq( + Re.set(("a", "z"), ("A", "Z"), "_"), + Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + ) ) - ) - BLANKS = Terminal("BLANKS", Re.set("\r", "\n", "\t", " ").plus()) + BLANKS = Terminal(Re.set("\r", "\n", "\t", " ").plus()) - - LexTest = Grammar(start=foo, trivia=[BLANKS]) - lexer = LexTest.compile_lexer() + lexer = LexTest().compile_lexer() dump_lexer_table(lexer) tokens = list(generic_tokenize("xy is ass", lexer)) assert tokens == [ - (IDENTIFIER, 0, 2), - (BLANKS, 2, 1), - (IS, 3, 2), - (BLANKS, 5, 1), - (IDENTIFIER, 6, 3), + (LexTest.IDENTIFIER, 0, 2), + (LexTest.BLANKS, 2, 1), + (LexTest.IS, 3, 2), + (LexTest.BLANKS, 5, 1), + (LexTest.IDENTIFIER, 6, 3), ] @@ -388,35 +387,34 @@ def test_lexer_compile(): def test_lexer_numbers(n: float): assume(math.isfinite(n)) - @rule - def number(): - return NUMBER + class LexTest(Grammar): + @rule + def number(self): + return self.NUMBER - NUMBER = Terminal( - "NUMBER", - Re.seq( - Re.set(("0", "9")).plus(), + start = "number" + + NUMBER = Terminal( Re.seq( - Re.literal("."), Re.set(("0", "9")).plus(), - ).question(), - Re.seq( - Re.set("e", "E"), - Re.set("+", "-").question(), - Re.set(("0", "9")).plus(), - ).question(), + Re.seq( + Re.literal("."), + Re.set(("0", "9")).plus(), + ).question(), + Re.seq( + Re.set("e", "E"), + Re.set("+", "-").question(), + Re.set(("0", "9")).plus(), + ).question(), + ) ) - ) - - LexTest = Grammar(start=number) - - lexer = LexTest.compile_lexer() + lexer = LexTest().compile_lexer() dump_lexer_table(lexer) number_string = str(n) tokens = list(generic_tokenize(number_string, lexer)) assert tokens == [ - (NUMBER, 0, len(number_string)), + (LexTest.NUMBER, 0, len(number_string)), ] diff --git a/tests/test_wadler.py b/tests/test_wadler.py index bf52824..e66c29d 100644 --- a/tests/test_wadler.py +++ b/tests/test_wadler.py @@ -23,66 +23,69 @@ import parser.wadler.builder as builder import parser.wadler.runtime as runtime -def make_json_grammar(): +class JsonGrammar(Grammar): + start = "root" + + trivia = ["BLANKS"] + @rule - def root(): - return value + def root(self): + return self.value @rule(transparent=True) - def value(): + def value(self): return ( - object - | array - | NUMBER - | TRUE - | FALSE - | NULL - | STRING + self.object + | self.array + | self.NUMBER + | self.TRUE + | self.FALSE + | self.NULL + | self.STRING ) @rule - def object(): + def object(self): return group( - LCURLY + opt(indent(newline() + _object_pairs)) + newline() + RCURLY + self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY ) @rule - def _object_pairs(): + def _object_pairs(self): return alt( - object_pair, - object_pair + COMMA + newline(" ") + _object_pairs, + self.object_pair, + self.object_pair + self.COMMA + newline(" ") + self._object_pairs, ) @rule - def object_pair(): - return group(STRING + COLON + indent(newline(" ") + value)) + def object_pair(self): + return group(self.STRING + self.COLON + indent(newline(" ") + self.value)) @rule - def array(): + def array(self): return group( - LSQUARE + opt(indent(newline() + _array_items)) + newline() + RSQUARE + self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE ) @rule - def _array_items(): + def _array_items(self): return alt( - value, - value + COMMA + newline(" ") + _array_items, + self.value, + self.value + self.COMMA + newline(" ") + self._array_items, ) - BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus()) + BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) - LCURLY = Terminal("LCURLY", "{") - RCURLY = Terminal("RCURLY", "}") - COMMA = Terminal("COMMA", ",") - COLON = Terminal("COLON", ":") - LSQUARE = Terminal("LSQUARE", "[") - RSQUARE = Terminal("RSQUARE", "]") - TRUE = Terminal("TRUE", "true") - FALSE = Terminal("FALSE", "false") - NULL = Terminal("NULL", "null") + LCURLY = Terminal("{") + RCURLY = Terminal("}") + COMMA = Terminal(",") + COLON = Terminal(":") + LSQUARE = Terminal("[") + RSQUARE = Terminal("]") + TRUE = Terminal("true") + FALSE = Terminal("false") + NULL = Terminal("null") NUMBER = Terminal( - "NUMBER", Re.seq( Re.set(("0", "9")).plus(), Re.seq( @@ -97,7 +100,6 @@ def make_json_grammar(): ), ) STRING = Terminal( - "STRING", Re.seq( Re.literal('"'), (~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(), @@ -105,9 +107,8 @@ def make_json_grammar(): ) ) - return Grammar(start=root, trivia=[BLANKS]) -JSON = make_json_grammar() +JSON = JsonGrammar() JSON_PARSER = JSON.build_table() JSON_LEXER = JSON.compile_lexer() @@ -227,49 +228,47 @@ def test_layout_basic(): ) -def make_test_grammar(): - @rule - def root(): - return _expression +class TG(Grammar): + start = "root" + trivia = ["BLANKS", "LINE_BREAK", "COMMENT"] @rule - def _expression(): - return word | list + def root(self): + return self._expression @rule - def list(): - return group(LPAREN, indent(nl, _expressions), nl, RPAREN) + def _expression(self): + return self.word | self.list @rule - def _expressions(): - return _expression | seq(_expressions, sp, _expression) + def list(self): + return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN) @rule - def word(): - return OK | seq(BREAK, br, BREAK) + def _expressions(self): + return self._expression | seq(self._expressions, sp, self._expression) - LPAREN = Terminal("LPAREN", "(") - RPAREN = Terminal("RPAREN", ")") - OK = Terminal("OK", "ok") - BREAK = Terminal("BREAK", "break") + @rule + def word(self): + return self.OK | seq(self.BREAK, br, self.BREAK) - BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus()) - LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine) + LPAREN = Terminal("(") + RPAREN = Terminal(")") + OK = Terminal("ok") + BREAK = Terminal("break") + + BLANKS = Terminal(Re.set(" ", "\t").plus()) + LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine) COMMENT = Terminal( - "COMMENT", Re.seq(Re.literal(";"), Re.set("\n").invert().star()), trivia_mode=TriviaMode.LineComment, ) - return Grammar(start=root, trivia=[BLANKS, LINE_BREAK, COMMENT], pretty_indent=" ") - -TG = make_test_grammar() - - def test_forced_break(): - g_lexer = TG.compile_lexer() - g_parser = TG.build_table() + g = TG() + g_lexer = g.compile_lexer() + g_parser = g.build_table() text = "((ok ok) (ok break break ok) (ok ok ok ok))" @@ -277,28 +276,29 @@ def test_forced_break(): assert errors == [] assert tree is not None - printer = runtime.Printer(builder.compile_pretty_table(TG)) + printer = runtime.Printer(builder.compile_pretty_table(g)) result = printer.format_tree(tree, text, 200).apply_to_source(text) assert result == _output( """ ( - (ok ok) - ( - ok - break - break - ok - ) - (ok ok ok ok) + (ok ok) + ( + ok + break + break + ok + ) + (ok ok ok ok) ) """ ) def test_maintaining_line_breaks(): - g_lexer = TG.compile_lexer() - g_parser = TG.build_table() + g = TG() + g_lexer = g.compile_lexer() + g_parser = g.build_table() text = """((ok ok) ; Don't break here. @@ -316,29 +316,30 @@ def test_maintaining_line_breaks(): assert errors == [] assert tree is not None - printer = runtime.Printer(builder.compile_pretty_table(TG)) + printer = runtime.Printer(builder.compile_pretty_table(g)) result = printer.format_tree(tree, text, 200).apply_to_source(text) assert result == _output( """ ( - (ok ok) - ; Don't break here. - (ok) -*SPACE**SPACE* - ; ^ Do keep this break though. - (ok) -*SPACE**SPACE* - ; ^ This should only be one break. - (ok) + (ok ok) + ; Don't break here. + (ok) +*SPACE* + ; ^ Do keep this break though. + (ok) +*SPACE* + ; ^ This should only be one break. + (ok) ) """ ) def test_trailing_trivia(): - g_lexer = TG.compile_lexer() - g_parser = TG.build_table() + g = TG() + g_lexer = g.compile_lexer() + g_parser = g.build_table() text = """((ok ok)); Don't lose this! @@ -349,7 +350,7 @@ def test_trailing_trivia(): assert errors == [] assert tree is not None - printer = runtime.Printer(builder.compile_pretty_table(TG)) + printer = runtime.Printer(builder.compile_pretty_table(g)) result = printer.format_tree(tree, text, 200).apply_to_source(text) assert result == _output( @@ -362,8 +363,9 @@ def test_trailing_trivia(): def test_trailing_trivia_two(): - g_lexer = TG.compile_lexer() - g_parser = TG.build_table() + g = TG() + g_lexer = g.compile_lexer() + g_parser = g.build_table() text = """((ok ok)) @@ -374,7 +376,7 @@ def test_trailing_trivia_two(): assert errors == [] assert tree is not None - printer = runtime.Printer(builder.compile_pretty_table(TG)) + printer = runtime.Printer(builder.compile_pretty_table(g)) result = printer.format_tree(tree, text, 200).apply_to_source(text) assert result == _output( @@ -387,8 +389,9 @@ def test_trailing_trivia_two(): def test_trailing_trivia_split(): - g_lexer = TG.compile_lexer() - g_parser = TG.build_table() + g = TG() + g_lexer = g.compile_lexer() + g_parser = g.build_table() text = """((ok ok)); Don't lose this! @@ -429,7 +432,7 @@ def test_trailing_trivia_split(): print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}") trivia_doc = runtime.Matcher( - builder.MatcherTable(ParseTable([], [], set(), {}), {}, {}), + builder.MatcherTable(ParseTable([], [], set()), {}, {}), TRIVIA_MODES, ).apply_post_trivia( token.post_trivia,