[all] A whole new style for grammars
Say good by to the sea of `self.`!
This commit is contained in:
parent
d6f1e7aba1
commit
5064a768e7
10 changed files with 1097 additions and 1318 deletions
635
grammar.py
635
grammar.py
|
|
@ -20,444 +20,415 @@ from parser import (
|
|||
sp,
|
||||
)
|
||||
|
||||
@rule("File")
|
||||
def file() -> Rule:
|
||||
return _file_statement_list
|
||||
|
||||
class FineGrammar(Grammar):
|
||||
# generator = parser.GenerateLR1
|
||||
# generator = parser.GeneratePager
|
||||
start = "File"
|
||||
|
||||
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
|
||||
|
||||
pretty_indent = " "
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
precedence=[
|
||||
(Assoc.RIGHT, [self.EQUAL]),
|
||||
(Assoc.LEFT, [self.OR]),
|
||||
(Assoc.LEFT, [self.IS]),
|
||||
(Assoc.LEFT, [self.AND]),
|
||||
(Assoc.LEFT, [self.EQUALEQUAL, self.BANGEQUAL]),
|
||||
(Assoc.LEFT, [self.LESS, self.GREATER, self.GREATEREQUAL, self.LESSEQUAL]),
|
||||
(Assoc.LEFT, [self.PLUS, self.MINUS]),
|
||||
(Assoc.LEFT, [self.STAR, self.SLASH]),
|
||||
(Assoc.LEFT, [self.primary_expression]),
|
||||
(Assoc.LEFT, [self.LPAREN]),
|
||||
(Assoc.LEFT, [self.DOT]),
|
||||
#
|
||||
# If there's a confusion about whether to make an IF
|
||||
# statement or an expression, prefer the statement.
|
||||
#
|
||||
(Assoc.NONE, [self.if_statement]),
|
||||
],
|
||||
)
|
||||
|
||||
@rule("File")
|
||||
def file(self) -> Rule:
|
||||
return self._file_statement_list
|
||||
|
||||
@rule
|
||||
def _file_statement_list(self) -> Rule:
|
||||
@rule
|
||||
def _file_statement_list() -> Rule:
|
||||
return alt(
|
||||
self._file_statement,
|
||||
self._file_statement_list + nl + self._file_statement,
|
||||
_file_statement,
|
||||
_file_statement_list + nl + _file_statement,
|
||||
)
|
||||
|
||||
@rule
|
||||
def _file_statement(self) -> Rule:
|
||||
@rule
|
||||
def _file_statement() -> Rule:
|
||||
return (
|
||||
self.import_statement | self.class_declaration | self.export_statement | self._statement
|
||||
import_statement | class_declaration | export_statement | _statement
|
||||
)
|
||||
|
||||
@rule
|
||||
def import_statement(self) -> Rule:
|
||||
@rule
|
||||
def import_statement() -> Rule:
|
||||
return group(
|
||||
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
|
||||
IMPORT, sp, STRING, sp, AS, sp, IDENTIFIER, sp, SEMICOLON
|
||||
)
|
||||
|
||||
@rule("ClassDeclaration")
|
||||
def class_declaration(self) -> Rule:
|
||||
@rule("ClassDeclaration")
|
||||
def class_declaration() -> Rule:
|
||||
return seq(
|
||||
group(
|
||||
self.CLASS,
|
||||
CLASS,
|
||||
sp,
|
||||
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
||||
mark(IDENTIFIER, field="name", highlight=highlight.entity.name.type),
|
||||
sp,
|
||||
self.LCURLY,
|
||||
LCURLY,
|
||||
),
|
||||
indent(nl, mark(opt(self.class_body), field="body")),
|
||||
indent(nl, mark(opt(class_body), field="body")),
|
||||
nl,
|
||||
self.RCURLY,
|
||||
RCURLY,
|
||||
nl, # Extra newline at the end of the class
|
||||
)
|
||||
|
||||
@rule("ClassBody")
|
||||
def class_body(self) -> Rule:
|
||||
return self._class_members
|
||||
@rule("ClassBody")
|
||||
def class_body() -> Rule:
|
||||
return _class_members
|
||||
|
||||
@rule
|
||||
def _class_members(self) -> Rule:
|
||||
return self._class_member | seq(self._class_members, nl, self._class_member)
|
||||
@rule
|
||||
def _class_members() -> Rule:
|
||||
return _class_member | seq(_class_members, nl, _class_member)
|
||||
|
||||
@rule
|
||||
def _class_member(self) -> Rule:
|
||||
return self.field_declaration | self.function_declaration
|
||||
@rule
|
||||
def _class_member() -> Rule:
|
||||
return field_declaration | function_declaration
|
||||
|
||||
@rule("FieldDecl")
|
||||
def field_declaration(self) -> Rule:
|
||||
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON)
|
||||
@rule("FieldDecl")
|
||||
def field_declaration() -> Rule:
|
||||
return group(IDENTIFIER, COLON, sp, type_expression, SEMICOLON)
|
||||
|
||||
# Types
|
||||
@rule("TypeExpression")
|
||||
def type_expression(self) -> Rule:
|
||||
return self.alternate_type | self.type_identifier
|
||||
# Types
|
||||
@rule("TypeExpression")
|
||||
def type_expression() -> Rule:
|
||||
return alternate_type | type_identifier
|
||||
|
||||
@rule("AlternateType")
|
||||
def alternate_type(self) -> Rule:
|
||||
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
|
||||
@rule("AlternateType")
|
||||
def alternate_type() -> Rule:
|
||||
return group(type_expression, sp, OR, sp, type_identifier)
|
||||
|
||||
@rule("TypeIdentifier")
|
||||
def type_identifier(self) -> Rule:
|
||||
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
|
||||
@rule("TypeIdentifier")
|
||||
def type_identifier() -> Rule:
|
||||
return mark(IDENTIFIER, field="id", highlight=highlight.entity.name.type)
|
||||
|
||||
@rule
|
||||
def export_statement(self) -> Rule:
|
||||
@rule
|
||||
def export_statement() -> Rule:
|
||||
return alt(
|
||||
group(self.EXPORT, sp, self.class_declaration),
|
||||
group(self.EXPORT, sp, self.function_declaration),
|
||||
group(self.EXPORT, sp, self.let_statement),
|
||||
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
|
||||
group(EXPORT, sp, class_declaration),
|
||||
group(EXPORT, sp, function_declaration),
|
||||
group(EXPORT, sp, let_statement),
|
||||
group(EXPORT, sp, export_list, SEMICOLON),
|
||||
)
|
||||
|
||||
@rule
|
||||
def export_list(self) -> Rule:
|
||||
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
|
||||
@rule
|
||||
def export_list() -> Rule:
|
||||
return IDENTIFIER | seq(IDENTIFIER, COMMA, sp, export_list)
|
||||
|
||||
# Functions
|
||||
@rule("FunctionDecl")
|
||||
def function_declaration(self) -> Rule:
|
||||
# Functions
|
||||
@rule("FunctionDecl")
|
||||
def function_declaration() -> Rule:
|
||||
return seq(
|
||||
group(
|
||||
group(
|
||||
group(
|
||||
self.FUN,
|
||||
FUN,
|
||||
sp,
|
||||
mark(
|
||||
self.IDENTIFIER,
|
||||
IDENTIFIER,
|
||||
field="name",
|
||||
highlight=highlight.entity.name.function,
|
||||
),
|
||||
),
|
||||
nl,
|
||||
mark(self.function_parameters, field="parameters"),
|
||||
mark(function_parameters, field="parameters"),
|
||||
),
|
||||
mark(
|
||||
opt(indent(sp, group(self.ARROW, sp, self.type_expression))),
|
||||
opt(indent(sp, group(ARROW, sp, type_expression))),
|
||||
field="return_type",
|
||||
),
|
||||
),
|
||||
sp,
|
||||
mark(self.block, field="body"),
|
||||
mark(block, field="body"),
|
||||
nl,
|
||||
)
|
||||
|
||||
@rule("ParamList")
|
||||
def function_parameters(self) -> Rule:
|
||||
@rule("ParamList")
|
||||
def function_parameters() -> Rule:
|
||||
return group(
|
||||
self.LPAREN,
|
||||
LPAREN,
|
||||
indent(
|
||||
nl,
|
||||
opt(
|
||||
self._first_parameter
|
||||
| seq(self._first_parameter, self.COMMA)
|
||||
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
|
||||
_first_parameter
|
||||
| seq(_first_parameter, COMMA)
|
||||
| group(_first_parameter, COMMA, sp, _parameter_list)
|
||||
),
|
||||
),
|
||||
nl,
|
||||
self.RPAREN,
|
||||
RPAREN,
|
||||
)
|
||||
|
||||
@rule
|
||||
def _first_parameter(self) -> Rule:
|
||||
return self.SELF | self.parameter
|
||||
@rule
|
||||
def _first_parameter() -> Rule:
|
||||
return SELF | parameter
|
||||
|
||||
@rule
|
||||
def _parameter_list(self) -> Rule:
|
||||
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
|
||||
@rule
|
||||
def _parameter_list() -> Rule:
|
||||
return parameter | seq(parameter, COMMA, sp, _parameter_list)
|
||||
|
||||
@rule("Parameter")
|
||||
def parameter(self) -> Rule:
|
||||
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
|
||||
@rule("Parameter")
|
||||
def parameter() -> Rule:
|
||||
return group(IDENTIFIER, COLON, sp, type_expression)
|
||||
|
||||
# Block
|
||||
@rule("Block")
|
||||
def block(self) -> Rule:
|
||||
# Block
|
||||
@rule("Block")
|
||||
def block() -> Rule:
|
||||
return alt(
|
||||
group(self.LCURLY, nl, self.RCURLY),
|
||||
group(self.LCURLY, indent(br, self.block_body), sp, self.RCURLY),
|
||||
group(LCURLY, nl, RCURLY),
|
||||
group(LCURLY, indent(br, block_body), sp, RCURLY),
|
||||
)
|
||||
|
||||
@rule("BlockBody")
|
||||
def block_body(self) -> Rule:
|
||||
@rule("BlockBody")
|
||||
def block_body() -> Rule:
|
||||
return alt(
|
||||
self.expression,
|
||||
self._statement_list,
|
||||
seq(self._statement_list, br, self.expression),
|
||||
expression,
|
||||
_statement_list,
|
||||
seq(_statement_list, br, expression),
|
||||
)
|
||||
|
||||
@rule
|
||||
def _statement_list(self) -> Rule:
|
||||
return self._statement | seq(self._statement_list, br, self._statement)
|
||||
@rule
|
||||
def _statement_list() -> Rule:
|
||||
return _statement | seq(_statement_list, br, _statement)
|
||||
|
||||
@rule
|
||||
def _statement(self) -> Rule:
|
||||
@rule
|
||||
def _statement() -> Rule:
|
||||
return (
|
||||
self.function_declaration
|
||||
| self.let_statement
|
||||
| self.return_statement
|
||||
| self.for_statement
|
||||
| self.if_statement
|
||||
| self.while_statement
|
||||
| self.expression_statement
|
||||
function_declaration
|
||||
| let_statement
|
||||
| return_statement
|
||||
| for_statement
|
||||
| if_statement
|
||||
| while_statement
|
||||
| expression_statement
|
||||
)
|
||||
|
||||
@rule("LetStatement")
|
||||
def let_statement(self) -> Rule:
|
||||
@rule("LetStatement")
|
||||
def let_statement() -> Rule:
|
||||
return group(
|
||||
group(
|
||||
self.LET,
|
||||
LET,
|
||||
sp,
|
||||
self.IDENTIFIER,
|
||||
IDENTIFIER,
|
||||
sp,
|
||||
self.EQUAL,
|
||||
EQUAL,
|
||||
),
|
||||
indent(sp, self.expression, self.SEMICOLON),
|
||||
indent(sp, expression, SEMICOLON),
|
||||
)
|
||||
|
||||
@rule("ReturnStatement")
|
||||
def return_statement(self) -> Rule:
|
||||
@rule("ReturnStatement")
|
||||
def return_statement() -> Rule:
|
||||
return alt(
|
||||
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
|
||||
group(self.RETURN, self.SEMICOLON),
|
||||
group(RETURN, indent(sp, group(expression, SEMICOLON))),
|
||||
group(RETURN, SEMICOLON),
|
||||
)
|
||||
|
||||
@rule("ForStatement")
|
||||
def for_statement(self) -> Rule:
|
||||
@rule("ForStatement")
|
||||
def for_statement() -> Rule:
|
||||
return group(
|
||||
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
|
||||
self.block,
|
||||
group(FOR, sp, iterator_variable, sp, IN, sp, group(expression)),
|
||||
block,
|
||||
)
|
||||
|
||||
@rule("IteratorVariable")
|
||||
def iterator_variable(self) -> Rule:
|
||||
return self.IDENTIFIER
|
||||
@rule("IteratorVariable")
|
||||
def iterator_variable() -> Rule:
|
||||
return IDENTIFIER
|
||||
|
||||
@rule("IfStatement")
|
||||
def if_statement(self) -> Rule:
|
||||
return self.conditional_expression
|
||||
@rule("IfStatement")
|
||||
def if_statement() -> Rule:
|
||||
return conditional_expression
|
||||
|
||||
@rule
|
||||
def while_statement(self) -> Rule:
|
||||
return group(group(self.WHILE, sp, self.expression), sp, self.block)
|
||||
@rule
|
||||
def while_statement() -> Rule:
|
||||
return group(group(WHILE, sp, expression), sp, block)
|
||||
|
||||
@rule
|
||||
def expression_statement(self) -> Rule:
|
||||
return seq(self.expression, self.SEMICOLON)
|
||||
@rule
|
||||
def expression_statement() -> Rule:
|
||||
return seq(expression, SEMICOLON)
|
||||
|
||||
# Expressions
|
||||
@rule(transparent=True)
|
||||
def expression(self) -> Rule:
|
||||
return self.binary_expression | self.is_expression | self.primary_expression
|
||||
# Expressions
|
||||
@rule(transparent=True)
|
||||
def expression() -> Rule:
|
||||
return binary_expression | is_expression | primary_expression
|
||||
|
||||
@rule("BinaryExpression")
|
||||
def binary_expression(self) -> Rule:
|
||||
@rule("BinaryExpression")
|
||||
def binary_expression() -> Rule:
|
||||
return alt(
|
||||
# Assignment gets special indentation.
|
||||
group(group(self.expression, sp, self.EQUAL), indent(sp, self.expression)),
|
||||
group(group(expression, sp, EQUAL), indent(sp, expression)),
|
||||
# Other ones do not.
|
||||
group(group(self.expression, sp, self.OR), sp, self.expression),
|
||||
group(group(self.expression, sp, self.AND), sp, self.expression),
|
||||
group(group(self.expression, sp, self.EQUALEQUAL), sp, self.expression),
|
||||
group(group(self.expression, sp, self.BANGEQUAL), sp, self.expression),
|
||||
group(group(self.expression, sp, self.LESS), sp, self.expression),
|
||||
group(group(self.expression, sp, self.LESSEQUAL), sp, self.expression),
|
||||
group(group(self.expression, sp, self.GREATER), sp, self.expression),
|
||||
group(group(self.expression, sp, self.GREATEREQUAL), sp, self.expression),
|
||||
group(group(self.expression, sp, self.PLUS), sp, self.expression),
|
||||
group(group(self.expression, sp, self.MINUS), sp, self.expression),
|
||||
group(group(self.expression, sp, self.STAR), sp, self.expression),
|
||||
group(group(self.expression, sp, self.SLASH), sp, self.expression),
|
||||
group(group(expression, sp, OR), sp, expression),
|
||||
group(group(expression, sp, AND), sp, expression),
|
||||
group(group(expression, sp, EQUALEQUAL), sp, expression),
|
||||
group(group(expression, sp, BANGEQUAL), sp, expression),
|
||||
group(group(expression, sp, LESS), sp, expression),
|
||||
group(group(expression, sp, LESSEQUAL), sp, expression),
|
||||
group(group(expression, sp, GREATER), sp, expression),
|
||||
group(group(expression, sp, GREATEREQUAL), sp, expression),
|
||||
group(group(expression, sp, PLUS), sp, expression),
|
||||
group(group(expression, sp, MINUS), sp, expression),
|
||||
group(group(expression, sp, STAR), sp, expression),
|
||||
group(group(expression, sp, SLASH), sp, expression),
|
||||
)
|
||||
|
||||
@rule("IsExpression")
|
||||
def is_expression(self) -> Rule:
|
||||
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
|
||||
@rule("IsExpression")
|
||||
def is_expression() -> Rule:
|
||||
return group(expression, sp, IS, indent(sp, pattern))
|
||||
|
||||
@rule
|
||||
def primary_expression(self) -> Rule:
|
||||
@rule
|
||||
def primary_expression() -> Rule:
|
||||
return (
|
||||
self.identifier_expression
|
||||
| self.literal_expression
|
||||
| self.SELF
|
||||
| seq(self.BANG, self.primary_expression)
|
||||
| seq(self.MINUS, self.primary_expression)
|
||||
| self.block
|
||||
| self.conditional_expression
|
||||
| self.list_constructor_expression
|
||||
| self.object_constructor_expression
|
||||
| self.match_expression
|
||||
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
|
||||
identifier_expression
|
||||
| literal_expression
|
||||
| SELF
|
||||
| seq(BANG, primary_expression)
|
||||
| seq(MINUS, primary_expression)
|
||||
| block
|
||||
| conditional_expression
|
||||
| list_constructor_expression
|
||||
| object_constructor_expression
|
||||
| match_expression
|
||||
| seq(primary_expression, LPAREN, RPAREN)
|
||||
| group(
|
||||
self.primary_expression,
|
||||
self.LPAREN,
|
||||
indent(nl, self._expression_list),
|
||||
primary_expression,
|
||||
LPAREN,
|
||||
indent(nl, _expression_list),
|
||||
nl,
|
||||
self.RPAREN,
|
||||
RPAREN,
|
||||
)
|
||||
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
|
||||
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
|
||||
| group(primary_expression, indent(nl, DOT, IDENTIFIER))
|
||||
| group(LPAREN, indent(nl, expression), nl, RPAREN)
|
||||
)
|
||||
|
||||
@rule("IdentifierExpression")
|
||||
def identifier_expression(self):
|
||||
return self.IDENTIFIER
|
||||
@rule("IdentifierExpression")
|
||||
def identifier_expression():
|
||||
return IDENTIFIER
|
||||
|
||||
@rule("Literal")
|
||||
def literal_expression(self):
|
||||
return self.NUMBER | self.STRING | self.TRUE | self.FALSE
|
||||
@rule("Literal")
|
||||
def literal_expression():
|
||||
return NUMBER | STRING | TRUE | FALSE
|
||||
|
||||
@rule("ConditionalExpression")
|
||||
def conditional_expression(self) -> Rule:
|
||||
@rule("ConditionalExpression")
|
||||
def conditional_expression() -> Rule:
|
||||
return (
|
||||
seq(group(self.IF, sp, self.expression), sp, self.block)
|
||||
seq(group(IF, sp, expression), sp, block)
|
||||
| seq(
|
||||
group(self.IF, sp, self.expression),
|
||||
group(IF, sp, expression),
|
||||
sp,
|
||||
self.block,
|
||||
block,
|
||||
sp,
|
||||
self.ELSE,
|
||||
ELSE,
|
||||
sp,
|
||||
self.conditional_expression,
|
||||
conditional_expression,
|
||||
)
|
||||
| seq(
|
||||
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
|
||||
group(IF, sp, expression), sp, block, sp, ELSE, sp, block
|
||||
)
|
||||
)
|
||||
|
||||
@rule
|
||||
def list_constructor_expression(self) -> Rule:
|
||||
@rule
|
||||
def list_constructor_expression() -> Rule:
|
||||
return alt(
|
||||
group(self.LSQUARE, nl, self.RSQUARE),
|
||||
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
|
||||
group(LSQUARE, nl, RSQUARE),
|
||||
group(LSQUARE, indent(nl, _expression_list), nl, RSQUARE),
|
||||
)
|
||||
|
||||
@rule
|
||||
def _expression_list(self) -> Rule:
|
||||
@rule
|
||||
def _expression_list() -> Rule:
|
||||
return (
|
||||
self.expression
|
||||
| seq(self.expression, self.COMMA)
|
||||
| seq(self.expression, self.COMMA, sp, self._expression_list)
|
||||
expression
|
||||
| seq(expression, COMMA)
|
||||
| seq(expression, COMMA, sp, _expression_list)
|
||||
)
|
||||
|
||||
@rule
|
||||
def match_expression(self) -> Rule:
|
||||
@rule
|
||||
def match_expression() -> Rule:
|
||||
return group(
|
||||
group(self.MATCH, sp, self.expression, sp, self.LCURLY),
|
||||
indent(sp, self.match_arms),
|
||||
group(MATCH, sp, expression, sp, LCURLY),
|
||||
indent(sp, match_arms),
|
||||
sp,
|
||||
self.RCURLY,
|
||||
RCURLY,
|
||||
)
|
||||
|
||||
@rule("MatchArms")
|
||||
def match_arms(self) -> Rule:
|
||||
return self._match_arms
|
||||
@rule("MatchArms")
|
||||
def match_arms() -> Rule:
|
||||
return _match_arms
|
||||
|
||||
@rule
|
||||
def _match_arms(self) -> Rule:
|
||||
@rule
|
||||
def _match_arms() -> Rule:
|
||||
return (
|
||||
self.match_arm
|
||||
| seq(self.match_arm, self.COMMA)
|
||||
| seq(self.match_arm, self.COMMA, br, self._match_arms)
|
||||
match_arm
|
||||
| seq(match_arm, COMMA)
|
||||
| seq(match_arm, COMMA, br, _match_arms)
|
||||
)
|
||||
|
||||
@rule("MatchArm")
|
||||
def match_arm(self) -> Rule:
|
||||
return group(self.pattern, sp, self.ARROW, sp, self.expression)
|
||||
@rule("MatchArm")
|
||||
def match_arm() -> Rule:
|
||||
return group(pattern, sp, ARROW, sp, expression)
|
||||
|
||||
@rule("Pattern")
|
||||
def pattern(self) -> Rule:
|
||||
@rule("Pattern")
|
||||
def pattern() -> Rule:
|
||||
return (
|
||||
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
|
||||
| group(self.variable_binding, self._pattern_core)
|
||||
| self._pattern_core
|
||||
group(variable_binding, _pattern_core, sp, AND, sp, expression)
|
||||
| group(variable_binding, _pattern_core)
|
||||
| _pattern_core
|
||||
)
|
||||
|
||||
@rule
|
||||
def _pattern_core(self) -> Rule:
|
||||
return self.type_expression | self.wildcard_pattern
|
||||
@rule
|
||||
def _pattern_core() -> Rule:
|
||||
return type_expression | wildcard_pattern
|
||||
|
||||
@rule("WildcardPattern")
|
||||
def wildcard_pattern(self) -> Rule:
|
||||
return self.UNDERSCORE
|
||||
@rule("WildcardPattern")
|
||||
def wildcard_pattern() -> Rule:
|
||||
return UNDERSCORE
|
||||
|
||||
@rule("VariableBinding")
|
||||
def variable_binding(self) -> Rule:
|
||||
return seq(self.IDENTIFIER, self.COLON)
|
||||
@rule("VariableBinding")
|
||||
def variable_binding() -> Rule:
|
||||
return seq(IDENTIFIER, COLON)
|
||||
|
||||
@rule
|
||||
def object_constructor_expression(self) -> Rule:
|
||||
return group(self.NEW, sp, self.type_identifier, sp, self.field_list)
|
||||
@rule
|
||||
def object_constructor_expression() -> Rule:
|
||||
return group(NEW, sp, type_identifier, sp, field_list)
|
||||
|
||||
@rule
|
||||
def field_list(self) -> Rule:
|
||||
@rule
|
||||
def field_list() -> Rule:
|
||||
return alt(
|
||||
seq(self.LCURLY, self.RCURLY),
|
||||
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
|
||||
seq(LCURLY, RCURLY),
|
||||
group(LCURLY, indent(nl, field_values), nl, RCURLY),
|
||||
)
|
||||
|
||||
@rule
|
||||
def field_values(self) -> Rule:
|
||||
@rule
|
||||
def field_values() -> Rule:
|
||||
return (
|
||||
self.field_value
|
||||
| seq(self.field_value, self.COMMA)
|
||||
| seq(self.field_value, self.COMMA, sp, self.field_values)
|
||||
field_value
|
||||
| seq(field_value, COMMA)
|
||||
| seq(field_value, COMMA, sp, field_values)
|
||||
)
|
||||
|
||||
@rule
|
||||
def field_value(self) -> Rule:
|
||||
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
|
||||
@rule
|
||||
def field_value() -> Rule:
|
||||
return IDENTIFIER | group(IDENTIFIER, COLON, indent(sp, expression))
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
"COMMENT",
|
||||
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
|
||||
highlight=highlight.comment.line,
|
||||
trivia_mode=TriviaMode.LineComment,
|
||||
)
|
||||
)
|
||||
|
||||
ARROW = Terminal("->", highlight=highlight.keyword.operator)
|
||||
AS = Terminal("as", highlight=highlight.keyword.operator.expression)
|
||||
BAR = Terminal("|", highlight=highlight.keyword.operator.expression)
|
||||
CLASS = Terminal("class", highlight=highlight.storage.type.klass)
|
||||
COLON = Terminal(":", highlight=highlight.punctuation.separator)
|
||||
ELSE = Terminal("else", highlight=highlight.keyword.control.conditional)
|
||||
FOR = Terminal("for", highlight=highlight.keyword.control)
|
||||
FUN = Terminal("fun", highlight=highlight.storage.type.function)
|
||||
IDENTIFIER = Terminal(
|
||||
ARROW = Terminal("ARROW", "->", highlight=highlight.keyword.operator)
|
||||
AS = Terminal("AS", "as", highlight=highlight.keyword.operator.expression)
|
||||
BAR = Terminal("BAR", "|", highlight=highlight.keyword.operator.expression)
|
||||
CLASS = Terminal("CLASS", "class", highlight=highlight.storage.type.klass)
|
||||
COLON = Terminal("COLON", ":", highlight=highlight.punctuation.separator)
|
||||
ELSE = Terminal("ELSE", "else", highlight=highlight.keyword.control.conditional)
|
||||
FOR = Terminal("FOR", "for", highlight=highlight.keyword.control)
|
||||
FUN = Terminal("FUN", "fun", highlight=highlight.storage.type.function)
|
||||
IDENTIFIER = Terminal(
|
||||
"IDENTIFIER",
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
),
|
||||
)
|
||||
IF = Terminal("if", highlight=highlight.keyword.control.conditional)
|
||||
IMPORT = Terminal("import", highlight=highlight.keyword.other)
|
||||
IN = Terminal("in", highlight=highlight.keyword.operator)
|
||||
LCURLY = Terminal("{", highlight=highlight.punctuation.curly_brace.open)
|
||||
RCURLY = Terminal("}", highlight=highlight.punctuation.curly_brace.close)
|
||||
LET = Terminal("let", highlight=highlight.keyword.other)
|
||||
RETURN = Terminal("return", highlight=highlight.keyword.control)
|
||||
SEMICOLON = Terminal(";", highlight=highlight.punctuation.separator)
|
||||
STRING = Terminal(
|
||||
)
|
||||
IF = Terminal("IF", "if", highlight=highlight.keyword.control.conditional)
|
||||
IMPORT = Terminal("IMPORT", "import", highlight=highlight.keyword.other)
|
||||
IN = Terminal("IN", "in", highlight=highlight.keyword.operator)
|
||||
LCURLY = Terminal("LCURLY", "{", highlight=highlight.punctuation.curly_brace.open)
|
||||
RCURLY = Terminal("RCURLY", "}", highlight=highlight.punctuation.curly_brace.close)
|
||||
LET = Terminal("LET", "let", highlight=highlight.keyword.other)
|
||||
RETURN = Terminal("RETURN", "return", highlight=highlight.keyword.control)
|
||||
SEMICOLON = Terminal("SEMICOLON", ";", highlight=highlight.punctuation.separator)
|
||||
STRING = Terminal(
|
||||
"STRING",
|
||||
# Double-quoted string.
|
||||
Re.seq(
|
||||
Re.literal('"'),
|
||||
|
|
@ -471,27 +442,28 @@ class FineGrammar(Grammar):
|
|||
Re.literal("'"),
|
||||
),
|
||||
highlight=highlight.string.quoted,
|
||||
)
|
||||
WHILE = Terminal("while", highlight=highlight.keyword.control)
|
||||
EQUAL = Terminal("=", highlight=highlight.keyword.operator.expression)
|
||||
LPAREN = Terminal("(", highlight=highlight.punctuation.parenthesis.open)
|
||||
RPAREN = Terminal(")", highlight=highlight.punctuation.parenthesis.close)
|
||||
COMMA = Terminal(",", highlight=highlight.punctuation.separator)
|
||||
SELF = Terminal("self", name="SELFF", highlight=highlight.variable.language)
|
||||
OR = Terminal("or", highlight=highlight.keyword.operator.expression)
|
||||
IS = Terminal("is", highlight=highlight.keyword.operator.expression)
|
||||
AND = Terminal("and", highlight=highlight.keyword.operator.expression)
|
||||
EQUALEQUAL = Terminal("==", highlight=highlight.keyword.operator.expression)
|
||||
BANGEQUAL = Terminal("!=", highlight=highlight.keyword.operator.expression)
|
||||
LESS = Terminal("<", highlight=highlight.keyword.operator.expression)
|
||||
GREATER = Terminal(">", highlight=highlight.keyword.operator.expression)
|
||||
LESSEQUAL = Terminal("<=", highlight=highlight.keyword.operator.expression)
|
||||
GREATEREQUAL = Terminal(">=", highlight=highlight.keyword.operator.expression)
|
||||
PLUS = Terminal("+", highlight=highlight.keyword.operator.expression)
|
||||
MINUS = Terminal("-", highlight=highlight.keyword.operator.expression)
|
||||
STAR = Terminal("*", highlight=highlight.keyword.operator.expression)
|
||||
SLASH = Terminal("/", highlight=highlight.keyword.operator.expression)
|
||||
NUMBER = Terminal(
|
||||
)
|
||||
WHILE = Terminal("WHILE", "while", highlight=highlight.keyword.control)
|
||||
EQUAL = Terminal("EQUAL", "=", highlight=highlight.keyword.operator.expression)
|
||||
LPAREN = Terminal("LPAREN", "(", highlight=highlight.punctuation.parenthesis.open)
|
||||
RPAREN = Terminal("RPAREN", ")", highlight=highlight.punctuation.parenthesis.close)
|
||||
COMMA = Terminal("COMMA", ",", highlight=highlight.punctuation.separator)
|
||||
SELF = Terminal("SELFF", "self", highlight=highlight.variable.language)
|
||||
OR = Terminal("OR", "or", highlight=highlight.keyword.operator.expression)
|
||||
IS = Terminal("IS", "is", highlight=highlight.keyword.operator.expression)
|
||||
AND = Terminal("AND", "and", highlight=highlight.keyword.operator.expression)
|
||||
EQUALEQUAL = Terminal("EQUALEQUAL", "==", highlight=highlight.keyword.operator.expression)
|
||||
BANGEQUAL = Terminal("BANGEQUAL", "!=", highlight=highlight.keyword.operator.expression)
|
||||
LESS = Terminal("LESS", "<", highlight=highlight.keyword.operator.expression)
|
||||
GREATER = Terminal("GREATER", ">", highlight=highlight.keyword.operator.expression)
|
||||
LESSEQUAL = Terminal("LESSEQUAL", "<=", highlight=highlight.keyword.operator.expression)
|
||||
GREATEREQUAL = Terminal("GREATEREQUAL", ">=", highlight=highlight.keyword.operator.expression)
|
||||
PLUS = Terminal("PLUS", "+", highlight=highlight.keyword.operator.expression)
|
||||
MINUS = Terminal("MINUS", "-", highlight=highlight.keyword.operator.expression)
|
||||
STAR = Terminal("STAR", "*", highlight=highlight.keyword.operator.expression)
|
||||
SLASH = Terminal("SLASH", "/", highlight=highlight.keyword.operator.expression)
|
||||
NUMBER = Terminal(
|
||||
"NUMBER",
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
|
|
@ -505,18 +477,41 @@ class FineGrammar(Grammar):
|
|||
).question(),
|
||||
),
|
||||
highlight=highlight.constant.numeric,
|
||||
)
|
||||
TRUE = Terminal("true", highlight=highlight.constant.language)
|
||||
FALSE = Terminal("false", highlight=highlight.constant.language)
|
||||
BANG = Terminal("!", highlight=highlight.keyword.operator.expression)
|
||||
DOT = Terminal(".", highlight=highlight.punctuation.separator)
|
||||
MATCH = Terminal("match", highlight=highlight.keyword.other)
|
||||
EXPORT = Terminal("export", highlight=highlight.keyword.other)
|
||||
UNDERSCORE = Terminal("_", highlight=highlight.variable.language)
|
||||
NEW = Terminal("new", highlight=highlight.keyword.operator)
|
||||
LSQUARE = Terminal("[", highlight=highlight.punctuation.square_bracket.open)
|
||||
RSQUARE = Terminal("]", highlight=highlight.punctuation.square_bracket.close)
|
||||
)
|
||||
TRUE = Terminal("TRUE", "true", highlight=highlight.constant.language)
|
||||
FALSE = Terminal("FALSE", "false", highlight=highlight.constant.language)
|
||||
BANG = Terminal("BANG", "!", highlight=highlight.keyword.operator.expression)
|
||||
DOT = Terminal("DOT", ".", highlight=highlight.punctuation.separator)
|
||||
MATCH = Terminal("MATCH", "match", highlight=highlight.keyword.other)
|
||||
EXPORT = Terminal("EXPORT", "export", highlight=highlight.keyword.other)
|
||||
UNDERSCORE = Terminal("UNDERSCORE", "_", highlight=highlight.variable.language)
|
||||
NEW = Terminal("NEW", "new", highlight=highlight.keyword.operator)
|
||||
LSQUARE = Terminal("LSQUARE", "[", highlight=highlight.punctuation.square_bracket.open)
|
||||
RSQUARE = Terminal("RSQUARE", "]", highlight=highlight.punctuation.square_bracket.close)
|
||||
|
||||
FineGrammar=Grammar(
|
||||
start=file,
|
||||
trivia=[BLANKS, LINE_BREAK, COMMENT],
|
||||
pretty_indent=" ",
|
||||
precedence=[
|
||||
(Assoc.RIGHT, [EQUAL]),
|
||||
(Assoc.LEFT, [OR]),
|
||||
(Assoc.LEFT, [IS]),
|
||||
(Assoc.LEFT, [AND]),
|
||||
(Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
|
||||
(Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
|
||||
(Assoc.LEFT, [PLUS, MINUS]),
|
||||
(Assoc.LEFT, [STAR, SLASH]),
|
||||
(Assoc.LEFT, [primary_expression]),
|
||||
(Assoc.LEFT, [LPAREN]),
|
||||
(Assoc.LEFT, [DOT]),
|
||||
#
|
||||
# If there's a confusion about whether to make an IF
|
||||
# statement or an expression, prefer the statement.
|
||||
#
|
||||
(Assoc.NONE, [if_statement]),
|
||||
],
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
from pathlib import Path
|
||||
|
|
@ -525,7 +520,7 @@ if __name__ == "__main__":
|
|||
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
|
||||
|
||||
# TODO: Actually generate a lexer/parser for some runtime.
|
||||
grammar = FineGrammar()
|
||||
grammar = FineGrammar
|
||||
|
||||
table = grammar.build_table()
|
||||
# print(table.format())
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@ class FaceQuery:
|
|||
|
||||
|
||||
def gather_faces(grammar: parser.Grammar):
|
||||
nts = {nt.name: nt for nt in grammar.non_terminals()}
|
||||
|
||||
def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]:
|
||||
parts = []
|
||||
for item in input:
|
||||
|
|
@ -52,13 +50,12 @@ def gather_faces(grammar: parser.Grammar):
|
|||
)
|
||||
)
|
||||
|
||||
elif isinstance(item, str):
|
||||
nt = nts[item]
|
||||
if nt.transparent:
|
||||
if nt.name in visited:
|
||||
elif isinstance(item, parser.NonTerminal):
|
||||
if item.transparent:
|
||||
if item.name in visited:
|
||||
continue
|
||||
visited.add(nt.name)
|
||||
body = nt.fn(grammar)
|
||||
visited.add(item.name)
|
||||
body = item.definition
|
||||
for production in body.flatten(with_metadata=True):
|
||||
parts.extend(scoop(node, production, visited))
|
||||
|
||||
|
|
@ -69,7 +66,7 @@ def gather_faces(grammar: parser.Grammar):
|
|||
if rule.transparent:
|
||||
continue
|
||||
|
||||
body = rule.fn(grammar)
|
||||
body = rule.definition
|
||||
for production in body.flatten(with_metadata=True):
|
||||
queries.extend(scoop(rule.name, production, set()))
|
||||
|
||||
|
|
|
|||
351
parser/parser.py
351
parser/parser.py
|
|
@ -17,12 +17,10 @@ the thing that processes the tables.
|
|||
|
||||
## Making Grammars
|
||||
|
||||
To get started, create a grammar that derives from the `Grammar` class. Create
|
||||
one method per nonterminal, decorated with the `rule` decorator. Here's an
|
||||
example:
|
||||
Define a series of terminals (with `Terminal`) and rules (as functions decorated
|
||||
with `@rule`), and then pass the starting rule to the constructor of a `Grammar`
|
||||
object:
|
||||
|
||||
|
||||
class SimpleGrammar(Grammar):
|
||||
@rule
|
||||
def expression(self):
|
||||
return seq(self.expression, self.PLUS, self.term) | self.term
|
||||
|
|
@ -36,6 +34,7 @@ example:
|
|||
RPAREN = Terminal(')')
|
||||
ID = Terminal('id')
|
||||
|
||||
grammar = Grammar(start=expression)
|
||||
|
||||
## Using grammars
|
||||
|
||||
|
|
@ -1533,7 +1532,9 @@ class ParserGenerator:
|
|||
return builder.flush(config_sets)
|
||||
|
||||
|
||||
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
|
||||
FlattenedWithMetadata = list[
|
||||
"NonTerminal|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"
|
||||
]
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
|
@ -1578,26 +1579,32 @@ class Rule:
|
|||
class Terminal(Rule):
|
||||
"""A token, or terminal symbol in the grammar."""
|
||||
|
||||
name: str | None
|
||||
name: str
|
||||
pattern: "str | Re"
|
||||
meta: dict[str, typing.Any]
|
||||
regex: bool
|
||||
error_name: str | None
|
||||
definition_location: str
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
pattern: "str|Re",
|
||||
*,
|
||||
name: str | None = None,
|
||||
error_name: str | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
# TODO: Consider identifying the name from some kind of globals
|
||||
# dictionary or something if necessary.
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.meta = kwargs
|
||||
self.regex = isinstance(pattern, Re)
|
||||
self.error_name = error_name
|
||||
|
||||
caller = inspect.stack()[1]
|
||||
self.definition_location = f"{caller.filename}:{caller.lineno}"
|
||||
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
) -> typing.Generator[FlattenedWithMetadata, None, None]:
|
||||
|
|
@ -1617,14 +1624,17 @@ class NonTerminal(Rule):
|
|||
grammar class.
|
||||
"""
|
||||
|
||||
fn: typing.Callable[["Grammar"], Rule]
|
||||
fn: typing.Callable[[], Rule]
|
||||
name: str
|
||||
transparent: bool
|
||||
error_name: str | None
|
||||
definition_location: str
|
||||
_definition: Rule | None
|
||||
_body: "list[list[NonTerminal | Terminal]] | None"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
fn: typing.Callable[["Grammar"], Rule],
|
||||
fn: typing.Callable[[], Rule],
|
||||
name: str | None = None,
|
||||
transparent: bool = False,
|
||||
error_name: str | None = None,
|
||||
|
|
@ -1645,22 +1655,37 @@ class NonTerminal(Rule):
|
|||
self.name = name or fn.__name__
|
||||
self.transparent = transparent
|
||||
self.error_name = error_name
|
||||
self._definition = None
|
||||
self._body = None
|
||||
|
||||
def generate_body(self, grammar) -> list[list[str | Terminal]]:
|
||||
"""Generate the body of the non-terminal.
|
||||
caller = inspect.stack()[1]
|
||||
self.definition_location = f"{caller.filename}:{caller.lineno}"
|
||||
|
||||
We do this by first calling the associated function in order to get a
|
||||
Rule, and then flattening the Rule into the associated set of
|
||||
productions. We strip the metadata from the flattened result to make
|
||||
life a little easier for the caller.
|
||||
@property
|
||||
def definition(self) -> Rule:
|
||||
"""The rule that is the definition of this nonterminal.
|
||||
|
||||
(As opposed this rule itself, which is... itself.)
|
||||
"""
|
||||
if self._definition is None:
|
||||
self._definition = self.fn()
|
||||
return self._definition
|
||||
|
||||
@property
|
||||
def body(self) -> "list[list[NonTerminal | Terminal]]":
|
||||
"""The flattened body of the nonterminal: a list of productions where
|
||||
each production is a sequence of Terminals and NonTerminals.
|
||||
"""
|
||||
|
||||
def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]:
|
||||
def without_metadata(result: FlattenedWithMetadata) -> list[NonTerminal | Terminal]:
|
||||
for item in result:
|
||||
assert not isinstance(item, tuple)
|
||||
return typing.cast(list[str | Terminal], result)
|
||||
return typing.cast(list[NonTerminal | Terminal], result)
|
||||
|
||||
return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)]
|
||||
if self._body is None:
|
||||
self._body = [without_metadata(rule) for rule in self.fn().flatten(with_metadata=False)]
|
||||
|
||||
return self._body
|
||||
|
||||
def flatten(
|
||||
self, with_metadata: bool = False
|
||||
|
|
@ -1669,7 +1694,7 @@ class NonTerminal(Rule):
|
|||
# the context of some other production. Yield ourselves, and trust that
|
||||
# in time we will be asked to generate our body.
|
||||
del with_metadata
|
||||
yield [self.name]
|
||||
yield [self]
|
||||
|
||||
|
||||
class AlternativeRule(Rule):
|
||||
|
|
@ -1775,7 +1800,7 @@ def mark(rule: Rule, **kwargs) -> Rule:
|
|||
|
||||
|
||||
@typing.overload
|
||||
def rule(f: typing.Callable, /) -> Rule: ...
|
||||
def rule(f: typing.Callable, /) -> NonTerminal: ...
|
||||
|
||||
|
||||
@typing.overload
|
||||
|
|
@ -1783,16 +1808,15 @@ def rule(
|
|||
name: str | None = None,
|
||||
transparent: bool | None = None,
|
||||
error_name: str | None = None,
|
||||
) -> typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]: ...
|
||||
) -> typing.Callable[[typing.Callable[[], Rule]], NonTerminal]: ...
|
||||
|
||||
|
||||
def rule(
|
||||
name: str | None | typing.Callable = None,
|
||||
transparent: bool | None = None,
|
||||
error_name: str | None = None,
|
||||
) -> Rule | typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]:
|
||||
"""The decorator that marks a method in a Grammar object as a nonterminal
|
||||
rule.
|
||||
) -> NonTerminal | typing.Callable[[typing.Callable[[], Rule]], NonTerminal]:
|
||||
"""The decorator that marks a function as a nonterminal rule.
|
||||
|
||||
As with all the best decorators, it can be called with or without arguments.
|
||||
If called with one argument, that argument is a name that overrides the name
|
||||
|
|
@ -1801,7 +1825,7 @@ def rule(
|
|||
if callable(name):
|
||||
return rule()(name)
|
||||
|
||||
def wrapper(f: typing.Callable[[typing.Any], Rule]):
|
||||
def wrapper(f: typing.Callable[[], Rule]):
|
||||
nonlocal name
|
||||
nonlocal transparent
|
||||
nonlocal error_name
|
||||
|
|
@ -2746,22 +2770,89 @@ class TriviaMode(enum.Enum):
|
|||
|
||||
|
||||
###############################################################################
|
||||
# Finally, the base class for grammars
|
||||
# Finally, the grammar class.
|
||||
###############################################################################
|
||||
|
||||
PrecedenceList = list[typing.Tuple[Assoc, list[Rule | str]]]
|
||||
PrecedenceList = list[typing.Tuple[Assoc, list[Terminal|NonTerminal]]]
|
||||
|
||||
def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str,NonTerminal], dict[str,Terminal]]:
|
||||
"""Starting from the given NonTerminal, gather all of the symbols
|
||||
(NonTerminals and Terminals) that make up the grammar.
|
||||
"""
|
||||
# NOTE: We use a dummy dictionary here to preserve insertion order.
|
||||
# That way the first element in named_rules is always the start
|
||||
# symbol!
|
||||
rules: dict[NonTerminal, int] = {}
|
||||
terminals: dict[Terminal, int] = {}
|
||||
|
||||
# STEP 1 is to just gather all of the symbols that we can find.
|
||||
queue: list[NonTerminal] = [start]
|
||||
while len(queue) > 0:
|
||||
nt = queue.pop()
|
||||
if nt in rules:
|
||||
continue
|
||||
|
||||
# TODO: Here we can track modules (via the funcitons that make up
|
||||
# nonterminals, maybe) and maybe use that to infer terminal
|
||||
# names.
|
||||
rules[nt] = len(rules)
|
||||
|
||||
for rule in nt.body:
|
||||
for symbol in rule:
|
||||
if isinstance(symbol, NonTerminal):
|
||||
if symbol not in rules:
|
||||
queue.append(symbol)
|
||||
|
||||
elif isinstance(symbol, Terminal):
|
||||
terminals[symbol] = len(terminals)
|
||||
|
||||
else:
|
||||
typing.assert_never(symbol)
|
||||
|
||||
# (Terminals are also reachable!)
|
||||
for symbol in trivia:
|
||||
terminals[symbol] = len(terminals)
|
||||
|
||||
# Step 2 is to organize all of these things and check them for errors.
|
||||
named_rules: dict[str, NonTerminal] = {}
|
||||
for rule in rules:
|
||||
existing = named_rules.get(rule.name)
|
||||
if existing is not None:
|
||||
# TODO TEST
|
||||
raise ValueError(f"""Found more than one rule named {rule.name}:
|
||||
- {existing.definition_location}
|
||||
- {rule.definition_location}""")
|
||||
named_rules[rule.name] = rule
|
||||
|
||||
named_terminals: dict[str, Terminal] = {}
|
||||
for terminal in terminals:
|
||||
existing = named_terminals.get(terminal.name)
|
||||
if existing is not None:
|
||||
# TODO TEST
|
||||
raise ValueError(f"""Found more than one terminal named {terminal.name}:
|
||||
- {existing.definition_location}
|
||||
- {terminal.definition_location}""")
|
||||
|
||||
existing_rule = named_rules.get(terminal.name)
|
||||
if existing_rule is not None:
|
||||
# TODO TEST
|
||||
raise ValueError(f"""Found a terminal and a rule both named {terminal.name}:
|
||||
- The rule was defined at {existing_rule.definition_location}
|
||||
- The terminal was defined at {terminal.definition_location}""")
|
||||
|
||||
named_terminals[terminal.name] = terminal
|
||||
|
||||
return (named_rules, named_terminals)
|
||||
|
||||
|
||||
class Grammar:
|
||||
"""The base class for defining a grammar.
|
||||
|
||||
Inherit from this, and and define members for your nonterminals, and then
|
||||
use the `build_table` method to construct the parse tables.
|
||||
|
||||
"""A container that holds all the terminals and nonterminals for a
|
||||
given grammar. The terminals and nonterminals are defined elsewhere;
|
||||
provide the starting rule and this object will build the grammar from
|
||||
everything accessible.
|
||||
|
||||
Here's an example of a simple grammar:
|
||||
|
||||
class SimpleGrammar(Grammar):
|
||||
@rule
|
||||
def expression(self):
|
||||
return seq(self.expression, self.PLUS, self.term) | self.term
|
||||
|
|
@ -2775,116 +2866,54 @@ class Grammar:
|
|||
RPAREN = Terminal(')')
|
||||
ID = Terminal('id')
|
||||
|
||||
grammar = Grammar(start=expression)
|
||||
|
||||
Not very exciting, perhaps, but it's something.
|
||||
"""
|
||||
|
||||
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
||||
_generator: type[ParserGenerator]
|
||||
start: NonTerminal
|
||||
name: str
|
||||
pretty_indent: str | None
|
||||
_terminals: dict[str, Terminal]
|
||||
_nonterminals: dict[str, NonTerminal]
|
||||
_trivia: list[Terminal]
|
||||
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
start: str | NonTerminal | None = None,
|
||||
start: NonTerminal,
|
||||
precedence: PrecedenceList | None = None,
|
||||
generator: type[ParserGenerator] | None = None,
|
||||
trivia: list[str | Terminal] | None = None,
|
||||
trivia: list[Terminal] | None = None,
|
||||
name: str | None = None,
|
||||
pretty_indent: str | None = None,
|
||||
):
|
||||
if start is None:
|
||||
start = getattr(self, "start", None)
|
||||
if start is None:
|
||||
raise ValueError(
|
||||
"The default start rule must either be specified in the constructor or as an "
|
||||
"attribute in the class."
|
||||
)
|
||||
if isinstance(start, NonTerminal):
|
||||
start = start.name
|
||||
if start.transparent:
|
||||
# TODO: TEST
|
||||
raise ValueError("The start rule cannot be transparent")
|
||||
|
||||
if precedence is None:
|
||||
precedence = getattr(self, "precedence", [])
|
||||
precedence = []
|
||||
assert precedence is not None
|
||||
|
||||
if generator is None:
|
||||
generator = getattr(self, "generator", ParserGenerator)
|
||||
assert generator is not None
|
||||
|
||||
if trivia is None:
|
||||
trivia = getattr(self, "trivia", [])
|
||||
trivia = []
|
||||
assert trivia is not None
|
||||
|
||||
# Fixup terminal names with the name of the member that declared it.
|
||||
terminals = {}
|
||||
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
|
||||
if t.name is None:
|
||||
t.name = n
|
||||
|
||||
if n in terminals:
|
||||
raise ValueError(f"More than one terminal has the name '{n}'")
|
||||
terminals[n] = t
|
||||
|
||||
# Get the nonterminals.
|
||||
nonterminals = {}
|
||||
for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)):
|
||||
if nt.name in nonterminals:
|
||||
raise ValueError(f"More than one nonterminal found with the name '{nt.name}'")
|
||||
|
||||
if nt.name in terminals:
|
||||
raise ValueError(
|
||||
f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule"
|
||||
)
|
||||
|
||||
nonterminals[nt.name] = nt
|
||||
|
||||
# Resolve the trivia declarations correctly.
|
||||
resolved_trivia: list[Terminal] = []
|
||||
for t in trivia:
|
||||
if isinstance(t, str):
|
||||
resolved = terminals.get(t)
|
||||
if resolved is None:
|
||||
raise ValueError(f"The trivia '{t}' is not a terminal name")
|
||||
resolved_trivia.append(resolved)
|
||||
elif isinstance(t, Terminal):
|
||||
resolved_trivia.append(t)
|
||||
else:
|
||||
raise ValueError(f"{t} must be either a terminal name or literally a terminal")
|
||||
|
||||
# Fix up the precedence table.
|
||||
precedence_table = {}
|
||||
for prec, (associativity, symbols) in enumerate(precedence):
|
||||
for symbol in symbols:
|
||||
key = None
|
||||
if isinstance(symbol, Terminal):
|
||||
key = symbol.name
|
||||
if key is None:
|
||||
raise ValueError(f"{symbol} is a terminal that has not had a name set yet")
|
||||
elif isinstance(symbol, NonTerminal):
|
||||
key = symbol.name
|
||||
elif isinstance(symbol, str):
|
||||
if symbol in terminals or symbol in nonterminals:
|
||||
key = symbol
|
||||
|
||||
if key is None:
|
||||
raise ValueError(
|
||||
f"{symbol} must be either a Token or a NonTerminal, or the name of one"
|
||||
)
|
||||
|
||||
precedence_table[key] = (associativity, prec + 1)
|
||||
precedence_table[symbol.name] = (associativity, prec + 1)
|
||||
|
||||
if name is None:
|
||||
name = getattr(self, "name", None)
|
||||
if name is None:
|
||||
name = self.__class__.__name__.removesuffix("Grammar").lower()
|
||||
name = "unknown"
|
||||
|
||||
self._precedence = precedence_table
|
||||
self.start = start
|
||||
self._generator = generator
|
||||
self._terminals = terminals
|
||||
self._nonterminals = nonterminals
|
||||
self._trivia = resolved_trivia
|
||||
self.name = name
|
||||
self._nonterminals, self._terminals = gather_grammar(start, trivia)
|
||||
self._trivia = trivia
|
||||
self._precedence = precedence_table
|
||||
self.pretty_indent = pretty_indent
|
||||
|
||||
def terminals(self) -> list[Terminal]:
|
||||
return list(self._terminals.values())
|
||||
|
|
@ -2898,55 +2927,7 @@ class Grammar:
|
|||
def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
|
||||
return self._precedence.get(name)
|
||||
|
||||
# TODO: The flattened form should retain NonTerminal, not just str.
|
||||
def generate_nonterminal_dict(
|
||||
self, start: str | None = None
|
||||
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
|
||||
"""Convert the rules into a dictionary of productions, and a set of
|
||||
the names of transparent nonterminals.
|
||||
|
||||
Our table generators work on a very flat set of productions. This is the
|
||||
first step in flattening the productions from the members: walk the rules
|
||||
starting from the given start rule and flatten them, one by one, into a
|
||||
dictionary that maps nonterminal rule name to its associated list of
|
||||
productions.
|
||||
"""
|
||||
if start is None:
|
||||
start = self.start
|
||||
|
||||
nonterminals = self._nonterminals
|
||||
transparents = {rule.name for rule in nonterminals.values() if rule.transparent}
|
||||
|
||||
grammar = {}
|
||||
|
||||
rule = nonterminals.get(start)
|
||||
if rule is None:
|
||||
raise ValueError(f"Cannot find a rule named '{start}'")
|
||||
if rule.transparent:
|
||||
raise ValueError("The start rule cannot be transparent")
|
||||
queue = [rule]
|
||||
while len(queue) > 0:
|
||||
rule = queue.pop()
|
||||
if rule.name in grammar:
|
||||
continue
|
||||
|
||||
body = rule.generate_body(self)
|
||||
for clause in body:
|
||||
for symbol in clause:
|
||||
if not isinstance(symbol, Terminal):
|
||||
assert isinstance(symbol, str)
|
||||
nonterminal = nonterminals.get(symbol)
|
||||
if nonterminal is None:
|
||||
raise ValueError(f"While processing {rule.name}: cannot find {symbol}")
|
||||
queue.append(nonterminal)
|
||||
|
||||
grammar[rule.name] = body
|
||||
|
||||
return (grammar, transparents)
|
||||
|
||||
def desugar(
|
||||
self, start: str | None = None
|
||||
) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
|
||||
def desugar(self) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
|
||||
"""Convert the rules into a flat list of productions.
|
||||
|
||||
Our table generators work from a very flat set of productions. The form
|
||||
|
|
@ -2954,37 +2935,27 @@ class Grammar:
|
|||
generate_nonterminal_dict- less useful to people, probably, but it is
|
||||
the input form needed by the Generator.
|
||||
"""
|
||||
temp_grammar, transparents = self.generate_nonterminal_dict(start)
|
||||
grammar: list[tuple[str,list[str]]] = [
|
||||
(rule.name, [s.name for s in production])
|
||||
for rule in self._nonterminals.values()
|
||||
for production in rule.body
|
||||
]
|
||||
assert grammar[0][0] == self.start.name
|
||||
|
||||
grammar = []
|
||||
for rule_name, clauses in temp_grammar.items():
|
||||
for clause in clauses:
|
||||
new_clause = []
|
||||
for symbol in clause:
|
||||
if isinstance(symbol, Terminal):
|
||||
if symbol.name in temp_grammar:
|
||||
raise ValueError(
|
||||
f"'{symbol.name}' is the name of both a Terminal and a NonTerminal rule. This will cause problems."
|
||||
)
|
||||
new_clause.append(symbol.name)
|
||||
else:
|
||||
new_clause.append(symbol)
|
||||
|
||||
grammar.append((rule_name, new_clause))
|
||||
transparents = {name for name, rule in self._nonterminals.items() if rule.transparent}
|
||||
|
||||
return grammar, transparents
|
||||
|
||||
def build_table(self, start: str | None = None, generator=None) -> ParseTable:
|
||||
"""Construct a parse table for this grammar, starting at the named
|
||||
nonterminal rule.
|
||||
"""
|
||||
if start is None:
|
||||
start = self.start
|
||||
desugared, transparents = self.desugar(start)
|
||||
def build_table(self) -> ParseTable:
|
||||
"""Construct a parse table for this grammar."""
|
||||
desugared, transparents = self.desugar()
|
||||
|
||||
if generator is None:
|
||||
generator = self._generator
|
||||
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
|
||||
gen = ParserGenerator(
|
||||
self.start.name,
|
||||
desugared,
|
||||
precedence=self._precedence,
|
||||
transparents=transparents,
|
||||
)
|
||||
table = gen.gen_table()
|
||||
|
||||
for t in self._trivia:
|
||||
|
|
|
|||
|
|
@ -263,8 +263,7 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
|
|||
if rule.transparent:
|
||||
rule_name = "_" + rule_name
|
||||
|
||||
body = rule.fn(grammar)
|
||||
rule_definition = convert_to_tree_sitter(body, grammar)
|
||||
rule_definition = convert_to_tree_sitter(rule.definition, grammar)
|
||||
if rule_definition is None:
|
||||
raise Exception(f"Tree-sitter does not support the empty rule {rule_name}")
|
||||
rule_definition = apply_precedence(rule_definition, rule.name, grammar)
|
||||
|
|
@ -283,7 +282,6 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
|
|||
|
||||
|
||||
def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
|
||||
nts = {nt.name: nt for nt in grammar.non_terminals()}
|
||||
scope_suffix = "." + grammar.name
|
||||
|
||||
def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]:
|
||||
|
|
@ -300,13 +298,12 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
|
|||
raise Exception("Highlight must come with a field name") # TODO
|
||||
parts.append(f"{field_name}: _ @{highlight.scope}{scope_suffix}")
|
||||
|
||||
elif isinstance(item, str):
|
||||
nt = nts[item]
|
||||
if nt.transparent:
|
||||
if nt.name in visited:
|
||||
elif isinstance(item, parser.NonTerminal):
|
||||
if item.transparent:
|
||||
if item.name in visited:
|
||||
continue
|
||||
visited.add(nt.name)
|
||||
body = nt.fn(grammar)
|
||||
visited.add(item.name)
|
||||
body = item.definition
|
||||
for production in body.flatten(with_metadata=True):
|
||||
parts.extend(scoop(production, visited))
|
||||
|
||||
|
|
@ -317,7 +314,7 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
|
|||
if rule.transparent:
|
||||
continue
|
||||
|
||||
body = rule.fn(grammar)
|
||||
body = rule.definition
|
||||
patterns = set()
|
||||
for production in body.flatten(with_metadata=True):
|
||||
# Scoop up the meta...
|
||||
|
|
|
|||
|
|
@ -79,11 +79,7 @@ class MatcherTable:
|
|||
newline_replace: dict[str, str]
|
||||
|
||||
|
||||
def _compile_nonterminal_matcher(
|
||||
grammar: parser.Grammar,
|
||||
nonterminals: dict[str, parser.NonTerminal],
|
||||
rule: parser.NonTerminal,
|
||||
) -> MatcherTable:
|
||||
def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable:
|
||||
"""Generate a matcher table for a single nonterminal.
|
||||
|
||||
See the docs for [MatcherTable] to understand the result.
|
||||
|
|
@ -111,7 +107,7 @@ def _compile_nonterminal_matcher(
|
|||
def compile_nonterminal(name: str, rule: parser.NonTerminal):
|
||||
if name not in visited:
|
||||
visited.add(name)
|
||||
for production in rule.fn(grammar).flatten(with_metadata=True):
|
||||
for production in rule.fn().flatten(with_metadata=True):
|
||||
trans_prod = compile_production(production)
|
||||
generated_grammar.append((name, trans_prod))
|
||||
|
||||
|
|
@ -126,19 +122,18 @@ def _compile_nonterminal_matcher(
|
|||
|
||||
result = []
|
||||
for item in production:
|
||||
if isinstance(item, str):
|
||||
nt = nonterminals[item]
|
||||
if nt.transparent:
|
||||
if isinstance(item, parser.NonTerminal):
|
||||
if item.transparent:
|
||||
# If it's transparent then we make a new set of
|
||||
# productions that covers the contents of the
|
||||
# transparent nonterminal.
|
||||
name = "xxx_" + nt.name
|
||||
compile_nonterminal(name, nt)
|
||||
name = "xxx_" + item.name
|
||||
compile_nonterminal(name, item)
|
||||
result.append(name)
|
||||
else:
|
||||
# Otherwise it's a "token" in our input, named
|
||||
# "tree_{whatever}".
|
||||
result.append(f"tree_{item}")
|
||||
result.append(f"tree_{item.name}")
|
||||
|
||||
elif isinstance(item, parser.Terminal):
|
||||
# If it's a terminal it will appear in our input as
|
||||
|
|
@ -257,7 +252,7 @@ def _compile_nonterminal_matcher(
|
|||
|
||||
start_name = f"yyy_{rule.name}"
|
||||
compile_nonterminal(start_name, rule)
|
||||
gen = grammar._generator(start_name, generated_grammar)
|
||||
gen = parser.ParserGenerator(start_name, generated_grammar)
|
||||
parse_table = gen.gen_table()
|
||||
|
||||
for (_, replacement), rule_name in newlines.items():
|
||||
|
|
@ -296,7 +291,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) ->
|
|||
matchers = {}
|
||||
|
||||
if indent is None:
|
||||
indent = getattr(grammar, "pretty_indent", None)
|
||||
indent = grammar.pretty_indent
|
||||
if indent is None:
|
||||
indent = " "
|
||||
|
||||
|
|
@ -307,7 +302,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) ->
|
|||
trivia_mode[t.name] = mode
|
||||
|
||||
for name, rule in nonterminals.items():
|
||||
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
|
||||
matchers[name] = _compile_nonterminal_matcher(rule)
|
||||
|
||||
return PrettyTable(
|
||||
indent,
|
||||
|
|
|
|||
223
sql.py
223
sql.py
|
|
@ -2,6 +2,7 @@ from parser import *
|
|||
|
||||
|
||||
NAME = Terminal(
|
||||
"NAME",
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
|
|
@ -9,6 +10,7 @@ NAME = Terminal(
|
|||
)
|
||||
|
||||
STRING = Terminal(
|
||||
"STRING",
|
||||
Re.seq(
|
||||
Re.literal("'"),
|
||||
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
|
||||
|
|
@ -18,6 +20,7 @@ STRING = Terminal(
|
|||
)
|
||||
|
||||
NUMBER = Terminal(
|
||||
"NUMBER",
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
|
|
@ -33,118 +36,118 @@ NUMBER = Terminal(
|
|||
highlight=highlight.constant.numeric,
|
||||
)
|
||||
|
||||
OR = Terminal("or")
|
||||
AND = Terminal("and")
|
||||
NOT = Terminal("not")
|
||||
OR = Terminal("OR", "or")
|
||||
AND = Terminal("AND", "and")
|
||||
NOT = Terminal("NOT", "not")
|
||||
COMPARISON = Terminal(
|
||||
"COMPARISON",
|
||||
Re.literal("=")
|
||||
| Re.literal("<>")
|
||||
| Re.literal("<")
|
||||
| Re.literal(">")
|
||||
| Re.literal("<=")
|
||||
| Re.literal(">=")
|
||||
| Re.literal(">="),
|
||||
)
|
||||
PLUS = Terminal("+")
|
||||
MINUS = Terminal("-")
|
||||
STAR = Terminal("*")
|
||||
SLASH = Terminal("/")
|
||||
PLUS = Terminal("PLUS", "+")
|
||||
MINUS = Terminal("MINUS", "-")
|
||||
STAR = Terminal("STAR", "*")
|
||||
SLASH = Terminal("SLASH", "/")
|
||||
|
||||
precedence = [
|
||||
(Assoc.LEFT, ["OR"]),
|
||||
(Assoc.LEFT, ["AND"]),
|
||||
(Assoc.LEFT, ["NOT"]),
|
||||
(Assoc.LEFT, ["COMPARISON"]),
|
||||
(Assoc.LEFT, ["PLUS", "MINUS"]),
|
||||
(Assoc.LEFT, ["STAR", "SLASH"]),
|
||||
# TODO: Unary minus
|
||||
]
|
||||
ALL = Terminal("ALL", "all")
|
||||
AMMSC = Terminal("AMMSC", "ammsc")
|
||||
ANY = Terminal("ANY", "any")
|
||||
AS = Terminal("AS", "as")
|
||||
ASC = Terminal("ASC", "asc")
|
||||
AUTHORIZATION = Terminal("AUTHORIZATION", "authorization")
|
||||
BETWEEN = Terminal("BETWEEN", "between")
|
||||
BY = Terminal("BY", "by")
|
||||
CHARACTER = Terminal("CHARACTER", "character")
|
||||
CHECK = Terminal("CHECK", "check")
|
||||
CLOSE = Terminal("CLOSE", "close")
|
||||
COMMIT = Terminal("COMMIT", "commit")
|
||||
CONTINUE = Terminal("CONTINUE", "continue")
|
||||
CREATE = Terminal("CREATE", "create")
|
||||
CURRENT = Terminal("CURRENT", "current")
|
||||
CURSOR = Terminal("CURSOR", "cursor")
|
||||
DECIMAL = Terminal("DECIMAL", "decimal")
|
||||
DECLARE = Terminal("DECLARE", "declare")
|
||||
DEFAULT = Terminal("DEFAULT", "default")
|
||||
DELETE = Terminal("DELETE", "delete")
|
||||
DESC = Terminal("DESC", "desc")
|
||||
DISTINCT = Terminal("DISTINCT", "distinct")
|
||||
DOUBLE = Terminal("DOUBLE", "double")
|
||||
ESCAPE = Terminal("ESCAPE", "escape")
|
||||
EXISTS = Terminal("EXISTS", "exists")
|
||||
FETCH = Terminal("FETCH", "fetch")
|
||||
FLOAT = Terminal("FLOAT", "float")
|
||||
FOR = Terminal("FOR", "for")
|
||||
FOREIGN = Terminal("FOREIGN", "foreign")
|
||||
FOUND = Terminal("FOUND", "found")
|
||||
FROM = Terminal("FROM", "from")
|
||||
GOTO = Terminal("GOTO", "goto")
|
||||
GRANT = Terminal("GRANT", "grant")
|
||||
GROUP = Terminal("GROUP", "group")
|
||||
HAVING = Terminal("HAVING", "having")
|
||||
IN = Terminal("IN", "in")
|
||||
INDICATOR = Terminal("INDICATOR", "indicator")
|
||||
INSERT = Terminal("INSERT", "insert")
|
||||
INTEGER = Terminal("INTEGER", "integer")
|
||||
INTO = Terminal("INTO", "into")
|
||||
IS = Terminal("IS", "is")
|
||||
KEY = Terminal("KEY", "key")
|
||||
LANGUAGE = Terminal("LANGUAGE", "language")
|
||||
LIKE = Terminal("LIKE", "like")
|
||||
NULL = Terminal("NULL", "null")
|
||||
NUMERIC = Terminal("NUMERIC", "numeric")
|
||||
OF = Terminal("OF", "of")
|
||||
ON = Terminal("ON", "on")
|
||||
OPEN = Terminal("OPEN", "open")
|
||||
OPTION = Terminal("OPTION", "option")
|
||||
ORDER = Terminal("ORDER", "order")
|
||||
PARAMETER = Terminal("PARAMETER", "parameter")
|
||||
PRECISION = Terminal("PRECISION", "precision")
|
||||
PRIMARY = Terminal("PRIMARY", "primary")
|
||||
PRIVILEGES = Terminal("PRIVILEGES", "privileges")
|
||||
PROCEDURE = Terminal("PROCEDURE", "procedure")
|
||||
PUBLIC = Terminal("PUBLIC", "public")
|
||||
REAL = Terminal("REAL", "real")
|
||||
REFERENCES = Terminal("REFERENCES", "references")
|
||||
ROLLBACK = Terminal("ROLLBACK", "rollback")
|
||||
SCHEMA = Terminal("SCHEMA", "schema")
|
||||
SELECT = Terminal("SELECT", "select")
|
||||
SET = Terminal("SET", "set")
|
||||
SMALLINT = Terminal("SMALLINT", "smallint")
|
||||
SOME = Terminal("SOME", "some")
|
||||
SQLCODE = Terminal("SQLCODE", "sqlcode")
|
||||
SQLERROR = Terminal("SQLERROR", "sqlerror")
|
||||
TABLE = Terminal("TABLE", "table")
|
||||
TO = Terminal("TO", "to")
|
||||
UNION = Terminal("UNION", "union")
|
||||
UNIQUE = Terminal("UNIQUE", "unique")
|
||||
UPDATE = Terminal("UPDATE", "update")
|
||||
USER = Terminal("USER", "user")
|
||||
VALUES = Terminal("VALUES", "values")
|
||||
VIEW = Terminal("VIEW", "view")
|
||||
WHENEVER = Terminal("WHENEVER", "whenever")
|
||||
WHERE = Terminal("WHERE", "where")
|
||||
WITH = Terminal("WITH", "with")
|
||||
WORK = Terminal("WORK", "work")
|
||||
|
||||
ALL = Terminal("all")
|
||||
AMMSC = Terminal("ammsc")
|
||||
ANY = Terminal("any")
|
||||
ASC = Terminal("asc")
|
||||
AUTHORIZATION = Terminal("authorization")
|
||||
BETWEEN = Terminal("between")
|
||||
BY = Terminal("by")
|
||||
CHARACTER = Terminal("character")
|
||||
CHECK = Terminal("check")
|
||||
CLOSE = Terminal("close")
|
||||
COMMIT = Terminal("commit")
|
||||
CONTINUE = Terminal("continue")
|
||||
CREATE = Terminal("create")
|
||||
CURRENT = Terminal("current")
|
||||
CURSOR = Terminal("cursor")
|
||||
DECIMAL = Terminal("decimal")
|
||||
DECLARE = Terminal("declare")
|
||||
DEFAULT = Terminal("default")
|
||||
DELETE = Terminal("delete")
|
||||
DESC = Terminal("desc")
|
||||
DISTINCT = Terminal("distinct")
|
||||
DOUBLE = Terminal("double")
|
||||
ESCAPE = Terminal("escape")
|
||||
EXISTS = Terminal("exists")
|
||||
FETCH = Terminal("fetch")
|
||||
FLOAT = Terminal("float")
|
||||
FOR = Terminal("for")
|
||||
FOREIGN = Terminal("foreign")
|
||||
FOUND = Terminal("found")
|
||||
FROM = Terminal("from")
|
||||
GOTO = Terminal("goto")
|
||||
GRANT = Terminal("grant")
|
||||
GROUP = Terminal("group")
|
||||
HAVING = Terminal("having")
|
||||
IN = Terminal("in")
|
||||
INDICATOR = Terminal("indicator")
|
||||
INSERT = Terminal("insert")
|
||||
INTEGER = Terminal("integer")
|
||||
INTO = Terminal("into")
|
||||
IS = Terminal("is")
|
||||
KEY = Terminal("key")
|
||||
LANGUAGE = Terminal("language")
|
||||
LIKE = Terminal("like")
|
||||
NULL = Terminal("null")
|
||||
NUMERIC = Terminal("numeric")
|
||||
OF = Terminal("of")
|
||||
ON = Terminal("on")
|
||||
OPEN = Terminal("open")
|
||||
OPTION = Terminal("option")
|
||||
ORDER = Terminal("order")
|
||||
PARAMETER = Terminal("parameter")
|
||||
PRECISION = Terminal("precision")
|
||||
PRIMARY = Terminal("primary")
|
||||
PRIVILEGES = Terminal("privileges")
|
||||
PROCEDURE = Terminal("procedure")
|
||||
PUBLIC = Terminal("public")
|
||||
REAL = Terminal("real")
|
||||
REFERENCES = Terminal("references")
|
||||
ROLLBACK = Terminal("rollback")
|
||||
SCHEMA = Terminal("schema")
|
||||
SELECT = Terminal("select")
|
||||
SET = Terminal("set")
|
||||
SMALLINT = Terminal("smallint")
|
||||
SOME = Terminal("some")
|
||||
SQLCODE = Terminal("sqlcode")
|
||||
SQLERROR = Terminal("sqlerror")
|
||||
TABLE = Terminal("table")
|
||||
TO = Terminal("to")
|
||||
UNION = Terminal("union")
|
||||
UNIQUE = Terminal("unique")
|
||||
UPDATE = Terminal("update")
|
||||
USER = Terminal("user")
|
||||
VALUES = Terminal("values")
|
||||
VIEW = Terminal("view")
|
||||
WHENEVER = Terminal("whenever")
|
||||
WHERE = Terminal("where")
|
||||
WITH = Terminal("with")
|
||||
WORK = Terminal("work")
|
||||
SEMICOLON = Terminal("SEMICOLON", ";")
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
COMMA = Terminal("COMMA", ",")
|
||||
EQUAL = Terminal("EQUAL", "=")
|
||||
DOT = Terminal("DOT", ".")
|
||||
|
||||
SEMICOLON = Terminal(";")
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
COMMA = Terminal(",")
|
||||
EQUAL = Terminal("=")
|
||||
DOT = Terminal(".")
|
||||
AS = Terminal("as")
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
"COMMENT",
|
||||
Re.seq(Re.literal("--"), Re.set("\n").invert().star()),
|
||||
highlight=highlight.comment.line,
|
||||
trivia_mode=TriviaMode.LineComment,
|
||||
)
|
||||
|
||||
|
||||
@rule
|
||||
|
|
@ -740,3 +743,19 @@ def user():
|
|||
@rule
|
||||
def when_action():
|
||||
return (GOTO + NAME) | CONTINUE
|
||||
|
||||
|
||||
SQL = Grammar(
|
||||
start=sql_list,
|
||||
precedence=[
|
||||
(Assoc.LEFT, [OR]),
|
||||
(Assoc.LEFT, [AND]),
|
||||
(Assoc.LEFT, [NOT]),
|
||||
(Assoc.LEFT, [COMPARISON]),
|
||||
(Assoc.LEFT, [PLUS, MINUS]),
|
||||
(Assoc.LEFT, [STAR, SLASH]),
|
||||
# TODO: Unary minus
|
||||
],
|
||||
trivia=[BLANKS, COMMENT, LINE_BREAK],
|
||||
name="SQL",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,138 +11,141 @@ import parser.runtime as runtime
|
|||
|
||||
# Tests based on
|
||||
# https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
||||
class LGrammar(Grammar):
|
||||
start = "File"
|
||||
trivia = ["BLANKS"]
|
||||
|
||||
# Need a little bit of disambiguation for the symbol involved.
|
||||
precedence = [
|
||||
(Assoc.LEFT, ["PLUS", "MINUS"]),
|
||||
(Assoc.LEFT, ["STAR", "SLASH"]),
|
||||
(Assoc.LEFT, ["LPAREN"]),
|
||||
]
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
@rule
|
||||
def File(self):
|
||||
# TODO: Make lists easier
|
||||
return self._functions
|
||||
TRUE = Terminal("TRUE", "true")
|
||||
FALSE = Terminal("FALSE", "false")
|
||||
INT = Terminal("INT", Re.set(("0", "9")).plus())
|
||||
FN = Terminal("FN", "fn")
|
||||
ARROW = Terminal("ARROW", "->")
|
||||
COMMA = Terminal("COMMA", ",")
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
LCURLY = Terminal("LCURLY", "{")
|
||||
RCURLY = Terminal("RCURLY", "}")
|
||||
COLON = Terminal("COLON", ":")
|
||||
SEMICOLON = Terminal("SEMICOLON", ";")
|
||||
LET = Terminal("LET", "let")
|
||||
EQUAL = Terminal("EQUAL", "=")
|
||||
RETURN = Terminal("RETURN", "return")
|
||||
PLUS = Terminal("PLUS", "+")
|
||||
MINUS = Terminal("MINUS", "-")
|
||||
STAR = Terminal("STAR", "*")
|
||||
SLASH = Terminal("SLASH", "/")
|
||||
|
||||
@rule
|
||||
def _functions(self):
|
||||
return self.Function | (self._functions + self.Function)
|
||||
|
||||
@rule
|
||||
def Function(self):
|
||||
return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block
|
||||
|
||||
@rule
|
||||
def ParamList(self):
|
||||
return self.LPAREN + opt(self._parameters) + self.RPAREN
|
||||
|
||||
@rule
|
||||
def _parameters(self):
|
||||
# NOTE: The ungrammar in the reference does not talk about commas required between parameters
|
||||
# so this massages it to make them required. Commas are in the list not the param, which
|
||||
# is more awkward for processing but not terminally so.
|
||||
return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters)
|
||||
|
||||
@rule
|
||||
def Param(self):
|
||||
return self.NAME + self.COLON + self.TypeExpr
|
||||
|
||||
@rule
|
||||
def TypeExpr(self):
|
||||
return self.NAME
|
||||
|
||||
@rule
|
||||
def Block(self):
|
||||
return self.LCURLY + opt(self._statements) + self.RCURLY
|
||||
|
||||
@rule
|
||||
def _statements(self):
|
||||
return self.Stmt | self._statements + self.Stmt
|
||||
|
||||
@rule
|
||||
def Stmt(self):
|
||||
return self.StmtExpr | self.StmtLet | self.StmtReturn
|
||||
|
||||
@rule
|
||||
def StmtExpr(self):
|
||||
return self.Expr + self.SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtLet(self):
|
||||
return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtReturn(self):
|
||||
return self.RETURN + self.Expr + self.SEMICOLON
|
||||
|
||||
@rule
|
||||
def Expr(self):
|
||||
return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall
|
||||
|
||||
@rule
|
||||
def ExprLiteral(self):
|
||||
return self.INT | self.TRUE | self.FALSE
|
||||
|
||||
@rule
|
||||
def ExprName(self):
|
||||
return self.NAME
|
||||
|
||||
@rule
|
||||
def ExprParen(self):
|
||||
return self.LPAREN + self.Expr + self.RPAREN
|
||||
|
||||
@rule
|
||||
def ExprBinary(self):
|
||||
return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr
|
||||
|
||||
@rule
|
||||
def ExprCall(self):
|
||||
return self.Expr + self.ArgList
|
||||
|
||||
@rule
|
||||
def ArgList(self):
|
||||
return self.LPAREN + opt(self._arg_star) + self.RPAREN
|
||||
|
||||
@rule
|
||||
def _arg_star(self):
|
||||
# Again, a deviation from the original. See _parameters.
|
||||
return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
TRUE = Terminal("true")
|
||||
FALSE = Terminal("false")
|
||||
INT = Terminal(Re.set(("0", "9")).plus())
|
||||
FN = Terminal("fn")
|
||||
ARROW = Terminal("->")
|
||||
COMMA = Terminal(",")
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
LCURLY = Terminal("{")
|
||||
RCURLY = Terminal("}")
|
||||
COLON = Terminal(":")
|
||||
SEMICOLON = Terminal(";")
|
||||
LET = Terminal("let")
|
||||
EQUAL = Terminal("=")
|
||||
RETURN = Terminal("return")
|
||||
PLUS = Terminal("+")
|
||||
MINUS = Terminal("-")
|
||||
STAR = Terminal("*")
|
||||
SLASH = Terminal("/")
|
||||
|
||||
NAME = Terminal(
|
||||
NAME = Terminal(
|
||||
"NAME",
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
L_PARSE_TABLE = LGrammar().build_table()
|
||||
L_LEXER_TABLE = LGrammar().compile_lexer()
|
||||
@rule
|
||||
def File():
|
||||
# TODO: Make lists easier
|
||||
return _functions
|
||||
|
||||
@rule
|
||||
def _functions():
|
||||
return Function | (_functions + Function)
|
||||
|
||||
@rule
|
||||
def Function():
|
||||
return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block
|
||||
|
||||
@rule
|
||||
def ParamList():
|
||||
return LPAREN + opt(_parameters) + RPAREN
|
||||
|
||||
@rule
|
||||
def _parameters():
|
||||
# NOTE: The ungrammar in the reference does not talk about commas
|
||||
# required between parameters so this massages it to make them
|
||||
# required. Commas are in the list not the param, which is more
|
||||
# awkward for processing but not terminally so.
|
||||
return (Param + opt(COMMA)) | (Param + COMMA + _parameters)
|
||||
|
||||
@rule
|
||||
def Param():
|
||||
return NAME + COLON + TypeExpr
|
||||
|
||||
@rule
|
||||
def TypeExpr():
|
||||
return NAME
|
||||
|
||||
@rule
|
||||
def Block():
|
||||
return LCURLY + opt(_statements) + RCURLY
|
||||
|
||||
@rule
|
||||
def _statements():
|
||||
return Stmt | _statements + Stmt
|
||||
|
||||
@rule
|
||||
def Stmt():
|
||||
return StmtExpr | StmtLet | StmtReturn
|
||||
|
||||
@rule
|
||||
def StmtExpr():
|
||||
return Expr + SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtLet():
|
||||
return LET + NAME + EQUAL + Expr + SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtReturn():
|
||||
return RETURN + Expr + SEMICOLON
|
||||
|
||||
@rule
|
||||
def Expr():
|
||||
return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall
|
||||
|
||||
@rule
|
||||
def ExprLiteral():
|
||||
return INT | TRUE | FALSE
|
||||
|
||||
@rule
|
||||
def ExprName():
|
||||
return NAME
|
||||
|
||||
@rule
|
||||
def ExprParen():
|
||||
return LPAREN + Expr + RPAREN
|
||||
|
||||
@rule
|
||||
def ExprBinary():
|
||||
return Expr + (PLUS | MINUS | STAR | SLASH) + Expr
|
||||
|
||||
@rule
|
||||
def ExprCall():
|
||||
return Expr + ArgList
|
||||
|
||||
@rule
|
||||
def ArgList():
|
||||
return LPAREN + opt(_arg_star) + RPAREN
|
||||
|
||||
@rule
|
||||
def _arg_star():
|
||||
# Again, a deviation from the original. See _parameters.
|
||||
return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star)
|
||||
|
||||
LGrammar = Grammar(
|
||||
start=File,
|
||||
trivia=[BLANKS],
|
||||
# Need a little bit of disambiguation for the symbol involved.
|
||||
precedence = [
|
||||
(Assoc.LEFT, [PLUS, MINUS]),
|
||||
(Assoc.LEFT, [STAR, SLASH]),
|
||||
(Assoc.LEFT, [LPAREN]),
|
||||
],
|
||||
)
|
||||
|
||||
L_PARSE_TABLE = LGrammar.build_table()
|
||||
L_LEXER_TABLE = LGrammar.compile_lexer()
|
||||
|
||||
|
||||
def test_matklad_one():
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import pytest
|
||||
|
||||
import parser
|
||||
import parser.runtime as runtime
|
||||
|
||||
from parser import Grammar, seq, rule, Terminal
|
||||
|
|
@ -40,117 +39,68 @@ def _tree(treeform, count=0) -> runtime.Tree | runtime.TokenValue:
|
|||
def test_lr0_lr0():
|
||||
"""An LR0 grammar should work with an LR0 generator."""
|
||||
|
||||
class G(Grammar):
|
||||
start = "E"
|
||||
# generator = parser.GenerateLR0
|
||||
PLUS = Terminal("+", "+")
|
||||
LPAREN = Terminal("(", "(")
|
||||
RPAREN = Terminal(")", ")")
|
||||
IDENTIFIER = Terminal("id", "id")
|
||||
|
||||
@rule
|
||||
def E(self):
|
||||
return seq(self.E, self.PLUS, self.T) | self.T
|
||||
def E():
|
||||
return seq(E, PLUS, T) | T
|
||||
|
||||
@rule
|
||||
def T(self):
|
||||
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
|
||||
def T():
|
||||
return seq(LPAREN, E, RPAREN) | IDENTIFIER
|
||||
|
||||
PLUS = Terminal("+", name="+")
|
||||
LPAREN = Terminal("(", name="(")
|
||||
RPAREN = Terminal(")", name=")")
|
||||
IDENTIFIER = Terminal("id", name="id")
|
||||
G = Grammar(start=E)
|
||||
|
||||
table = G().build_table()
|
||||
tree, errors = runtime.Parser(table).parse(
|
||||
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
|
||||
)
|
||||
table = G.build_table()
|
||||
tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN))
|
||||
|
||||
assert errors == []
|
||||
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
|
||||
|
||||
|
||||
def test_all_generators():
|
||||
"""This grammar should work with everything honestly."""
|
||||
|
||||
class G(Grammar):
|
||||
start = "E"
|
||||
|
||||
@rule
|
||||
def E(self):
|
||||
return seq(self.E, self.PLUS, self.T) | self.T
|
||||
|
||||
@rule
|
||||
def T(self):
|
||||
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
|
||||
|
||||
PLUS = Terminal("+", name="+")
|
||||
LPAREN = Terminal("(", name="(")
|
||||
RPAREN = Terminal(")", name=")")
|
||||
IDENTIFIER = Terminal("id", name="id")
|
||||
|
||||
GENERATORS = [
|
||||
# parser.GenerateLR0,
|
||||
# parser.GeneratePager,
|
||||
parser.ParserGenerator,
|
||||
]
|
||||
for generator in GENERATORS:
|
||||
table = G().build_table(generator=generator)
|
||||
tree, errors = runtime.Parser(table).parse(
|
||||
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
|
||||
)
|
||||
|
||||
print("\n")
|
||||
print(generator)
|
||||
print(f"{table.format()}")
|
||||
|
||||
assert errors == []
|
||||
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
|
||||
|
||||
|
||||
def test_grammar_aho_ullman_2():
|
||||
class TestGrammar(Grammar):
|
||||
start = "S"
|
||||
@rule
|
||||
def S():
|
||||
return seq(X, X)
|
||||
|
||||
@rule
|
||||
def S(self):
|
||||
return seq(self.X, self.X)
|
||||
def X():
|
||||
return seq(A, X) | B
|
||||
|
||||
@rule
|
||||
def X(self):
|
||||
return seq(self.A, self.X) | self.B
|
||||
A = Terminal("A", "a")
|
||||
B = Terminal("B", "b")
|
||||
|
||||
A = Terminal("a")
|
||||
B = Terminal("b")
|
||||
|
||||
TestGrammar().build_table(generator=parser.ParserGenerator)
|
||||
# TestGrammar().build_table(generator=parser.GeneratePager)
|
||||
Grammar(start=S).build_table()
|
||||
|
||||
|
||||
def test_fun_lalr():
|
||||
|
||||
class TestGrammar(Grammar):
|
||||
start = "S"
|
||||
@rule
|
||||
def S():
|
||||
return seq(V, E)
|
||||
|
||||
@rule
|
||||
def S(self):
|
||||
return seq(self.V, self.E)
|
||||
def E():
|
||||
return F | seq(E, PLUS, F)
|
||||
|
||||
@rule
|
||||
def E(self):
|
||||
return self.F | seq(self.E, self.PLUS, self.F)
|
||||
def F():
|
||||
return V | INT | seq(LPAREN, E, RPAREN)
|
||||
|
||||
@rule
|
||||
def F(self):
|
||||
return self.V | self.INT | seq(self.LPAREN, self.E, self.RPAREN)
|
||||
def V():
|
||||
return ID
|
||||
|
||||
@rule
|
||||
def V(self):
|
||||
return self.ID
|
||||
PLUS = Terminal("PLUS", "+")
|
||||
INT = Terminal("INT", "int")
|
||||
ID = Terminal("ID", "id")
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
|
||||
PLUS = Terminal("+")
|
||||
INT = Terminal("int")
|
||||
ID = Terminal("id")
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
|
||||
TestGrammar().build_table()
|
||||
Grammar(start=S).build_table()
|
||||
|
||||
|
||||
def test_conflicting_names():
|
||||
|
|
@ -167,43 +117,28 @@ def test_conflicting_names():
|
|||
to understand.
|
||||
"""
|
||||
|
||||
class TestGrammar(Grammar):
|
||||
start = "IDENTIFIER"
|
||||
|
||||
@rule("IDENTIFIER")
|
||||
def identifier(self):
|
||||
return self.IDENTIFIER
|
||||
def identifier():
|
||||
return IDENTIFIER
|
||||
|
||||
IDENTIFIER = Terminal("Identifier")
|
||||
IDENTIFIER = Terminal("IDENTIFIER", "Identifier")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
TestGrammar().build_table()
|
||||
Grammar(start=identifier).build_table()
|
||||
|
||||
|
||||
def test_grammar_ignore_trivia():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
trivia = ["BLANK"]
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
def sentence():
|
||||
return WORD | seq(sentence, WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
WORD = Terminal("WORD", "blah")
|
||||
BLANK = Terminal("BLANK", " ")
|
||||
|
||||
table = G().build_table()
|
||||
table = Grammar(start=sentence, trivia=[BLANK]).build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
tree, errors = runtime.Parser(table).parse(
|
||||
Tokens(
|
||||
G.WORD,
|
||||
G.BLANK,
|
||||
G.WORD,
|
||||
G.BLANK,
|
||||
)
|
||||
)
|
||||
tree, errors = runtime.Parser(table).parse(Tokens(WORD, BLANK, WORD, BLANK))
|
||||
|
||||
assert errors == []
|
||||
assert tree == runtime.Tree(
|
||||
|
|
@ -234,135 +169,3 @@ def test_grammar_ignore_trivia():
|
|||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_grammar_unknown_trivia():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
trivia = ["BLANK"]
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
G().build_table()
|
||||
|
||||
|
||||
def test_grammar_trivia_symbol():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
trivia = [BLANK]
|
||||
|
||||
table = G().build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
|
||||
def test_grammar_trivia_constructor():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(trivia=[self.BLANK])
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
table = G().build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
|
||||
def test_grammar_trivia_constructor_string():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(trivia=["BLANK"])
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
table = G().build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
|
||||
def test_grammar_trivia_constructor_string_unknown():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(trivia=["BLANK"])
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
G().build_table()
|
||||
|
||||
|
||||
def test_grammar_name_implicit():
|
||||
class FooGrammar(Grammar):
|
||||
start = "x"
|
||||
|
||||
@rule
|
||||
def x(self):
|
||||
return self.WORD
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
assert FooGrammar().name == "foo"
|
||||
|
||||
|
||||
def test_grammar_name_explicit_member():
|
||||
class FooGrammar(Grammar):
|
||||
start = "x"
|
||||
|
||||
name = "bar"
|
||||
|
||||
@rule
|
||||
def x(self):
|
||||
return self.WORD
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
assert FooGrammar().name == "bar"
|
||||
|
||||
|
||||
def test_grammar_name_explicit_constructor():
|
||||
class FooGrammar(Grammar):
|
||||
start = "x"
|
||||
|
||||
name = "bar"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(name="baz")
|
||||
|
||||
@rule
|
||||
def x(self):
|
||||
return self.WORD
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
assert FooGrammar().name == "baz"
|
||||
|
|
|
|||
|
|
@ -354,32 +354,33 @@ def test_edge_list_always_sorted(points: list[tuple[int, int]]):
|
|||
|
||||
|
||||
def test_lexer_compile():
|
||||
class LexTest(Grammar):
|
||||
@rule
|
||||
def foo(self):
|
||||
return self.IS
|
||||
def foo():
|
||||
# NOTE: This is a hack to ensure the terminals are reachable. :P
|
||||
return IS | AS | IDENTIFIER
|
||||
|
||||
start = "foo"
|
||||
|
||||
IS = Terminal("is")
|
||||
AS = Terminal("as")
|
||||
IS = Terminal("IS", "is")
|
||||
AS = Terminal("AS", "as")
|
||||
IDENTIFIER = Terminal(
|
||||
"IDENTIFIER",
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
)
|
||||
)
|
||||
BLANKS = Terminal(Re.set("\r", "\n", "\t", " ").plus())
|
||||
BLANKS = Terminal("BLANKS", Re.set("\r", "\n", "\t", " ").plus())
|
||||
|
||||
lexer = LexTest().compile_lexer()
|
||||
|
||||
LexTest = Grammar(start=foo, trivia=[BLANKS])
|
||||
lexer = LexTest.compile_lexer()
|
||||
dump_lexer_table(lexer)
|
||||
tokens = list(generic_tokenize("xy is ass", lexer))
|
||||
assert tokens == [
|
||||
(LexTest.IDENTIFIER, 0, 2),
|
||||
(LexTest.BLANKS, 2, 1),
|
||||
(LexTest.IS, 3, 2),
|
||||
(LexTest.BLANKS, 5, 1),
|
||||
(LexTest.IDENTIFIER, 6, 3),
|
||||
(IDENTIFIER, 0, 2),
|
||||
(BLANKS, 2, 1),
|
||||
(IS, 3, 2),
|
||||
(BLANKS, 5, 1),
|
||||
(IDENTIFIER, 6, 3),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -387,14 +388,12 @@ def test_lexer_compile():
|
|||
def test_lexer_numbers(n: float):
|
||||
assume(math.isfinite(n))
|
||||
|
||||
class LexTest(Grammar):
|
||||
@rule
|
||||
def number(self):
|
||||
return self.NUMBER
|
||||
|
||||
start = "number"
|
||||
def number():
|
||||
return NUMBER
|
||||
|
||||
NUMBER = Terminal(
|
||||
"NUMBER",
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
|
|
@ -409,12 +408,15 @@ def test_lexer_numbers(n: float):
|
|||
)
|
||||
)
|
||||
|
||||
lexer = LexTest().compile_lexer()
|
||||
|
||||
LexTest = Grammar(start=number)
|
||||
|
||||
lexer = LexTest.compile_lexer()
|
||||
dump_lexer_table(lexer)
|
||||
|
||||
number_string = str(n)
|
||||
|
||||
tokens = list(generic_tokenize(number_string, lexer))
|
||||
assert tokens == [
|
||||
(LexTest.NUMBER, 0, len(number_string)),
|
||||
(NUMBER, 0, len(number_string)),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -23,69 +23,66 @@ import parser.wadler.builder as builder
|
|||
import parser.wadler.runtime as runtime
|
||||
|
||||
|
||||
class JsonGrammar(Grammar):
|
||||
start = "root"
|
||||
|
||||
trivia = ["BLANKS"]
|
||||
|
||||
def make_json_grammar():
|
||||
@rule
|
||||
def root(self):
|
||||
return self.value
|
||||
def root():
|
||||
return value
|
||||
|
||||
@rule(transparent=True)
|
||||
def value(self):
|
||||
def value():
|
||||
return (
|
||||
self.object
|
||||
| self.array
|
||||
| self.NUMBER
|
||||
| self.TRUE
|
||||
| self.FALSE
|
||||
| self.NULL
|
||||
| self.STRING
|
||||
object
|
||||
| array
|
||||
| NUMBER
|
||||
| TRUE
|
||||
| FALSE
|
||||
| NULL
|
||||
| STRING
|
||||
)
|
||||
|
||||
@rule
|
||||
def object(self):
|
||||
def object():
|
||||
return group(
|
||||
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
|
||||
LCURLY + opt(indent(newline() + _object_pairs)) + newline() + RCURLY
|
||||
)
|
||||
|
||||
@rule
|
||||
def _object_pairs(self):
|
||||
def _object_pairs():
|
||||
return alt(
|
||||
self.object_pair,
|
||||
self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
|
||||
object_pair,
|
||||
object_pair + COMMA + newline(" ") + _object_pairs,
|
||||
)
|
||||
|
||||
@rule
|
||||
def object_pair(self):
|
||||
return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
|
||||
def object_pair():
|
||||
return group(STRING + COLON + indent(newline(" ") + value))
|
||||
|
||||
@rule
|
||||
def array(self):
|
||||
def array():
|
||||
return group(
|
||||
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
|
||||
LSQUARE + opt(indent(newline() + _array_items)) + newline() + RSQUARE
|
||||
)
|
||||
|
||||
@rule
|
||||
def _array_items(self):
|
||||
def _array_items():
|
||||
return alt(
|
||||
self.value,
|
||||
self.value + self.COMMA + newline(" ") + self._array_items,
|
||||
value,
|
||||
value + COMMA + newline(" ") + _array_items,
|
||||
)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
LCURLY = Terminal("{")
|
||||
RCURLY = Terminal("}")
|
||||
COMMA = Terminal(",")
|
||||
COLON = Terminal(":")
|
||||
LSQUARE = Terminal("[")
|
||||
RSQUARE = Terminal("]")
|
||||
TRUE = Terminal("true")
|
||||
FALSE = Terminal("false")
|
||||
NULL = Terminal("null")
|
||||
LCURLY = Terminal("LCURLY", "{")
|
||||
RCURLY = Terminal("RCURLY", "}")
|
||||
COMMA = Terminal("COMMA", ",")
|
||||
COLON = Terminal("COLON", ":")
|
||||
LSQUARE = Terminal("LSQUARE", "[")
|
||||
RSQUARE = Terminal("RSQUARE", "]")
|
||||
TRUE = Terminal("TRUE", "true")
|
||||
FALSE = Terminal("FALSE", "false")
|
||||
NULL = Terminal("NULL", "null")
|
||||
NUMBER = Terminal(
|
||||
"NUMBER",
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
|
|
@ -100,6 +97,7 @@ class JsonGrammar(Grammar):
|
|||
),
|
||||
)
|
||||
STRING = Terminal(
|
||||
"STRING",
|
||||
Re.seq(
|
||||
Re.literal('"'),
|
||||
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
|
||||
|
|
@ -107,8 +105,9 @@ class JsonGrammar(Grammar):
|
|||
)
|
||||
)
|
||||
|
||||
return Grammar(start=root, trivia=[BLANKS])
|
||||
|
||||
JSON = JsonGrammar()
|
||||
JSON = make_json_grammar()
|
||||
JSON_PARSER = JSON.build_table()
|
||||
JSON_LEXER = JSON.compile_lexer()
|
||||
|
||||
|
|
@ -228,47 +227,49 @@ def test_layout_basic():
|
|||
)
|
||||
|
||||
|
||||
class TG(Grammar):
|
||||
start = "root"
|
||||
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
|
||||
def make_test_grammar():
|
||||
@rule
|
||||
def root():
|
||||
return _expression
|
||||
|
||||
@rule
|
||||
def root(self):
|
||||
return self._expression
|
||||
def _expression():
|
||||
return word | list
|
||||
|
||||
@rule
|
||||
def _expression(self):
|
||||
return self.word | self.list
|
||||
def list():
|
||||
return group(LPAREN, indent(nl, _expressions), nl, RPAREN)
|
||||
|
||||
@rule
|
||||
def list(self):
|
||||
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN)
|
||||
def _expressions():
|
||||
return _expression | seq(_expressions, sp, _expression)
|
||||
|
||||
@rule
|
||||
def _expressions(self):
|
||||
return self._expression | seq(self._expressions, sp, self._expression)
|
||||
def word():
|
||||
return OK | seq(BREAK, br, BREAK)
|
||||
|
||||
@rule
|
||||
def word(self):
|
||||
return self.OK | seq(self.BREAK, br, self.BREAK)
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
OK = Terminal("OK", "ok")
|
||||
BREAK = Terminal("BREAK", "break")
|
||||
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
OK = Terminal("ok")
|
||||
BREAK = Terminal("break")
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
"COMMENT",
|
||||
Re.seq(Re.literal(";"), Re.set("\n").invert().star()),
|
||||
trivia_mode=TriviaMode.LineComment,
|
||||
)
|
||||
|
||||
return Grammar(start=root, trivia=[BLANKS, LINE_BREAK, COMMENT], pretty_indent=" ")
|
||||
|
||||
TG = make_test_grammar()
|
||||
|
||||
|
||||
|
||||
def test_forced_break():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
|
||||
|
||||
|
|
@ -276,7 +277,7 @@ def test_forced_break():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
|
|
@ -296,9 +297,8 @@ def test_forced_break():
|
|||
|
||||
|
||||
def test_maintaining_line_breaks():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok)
|
||||
; Don't break here.
|
||||
|
|
@ -316,7 +316,7 @@ def test_maintaining_line_breaks():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
|
|
@ -325,10 +325,10 @@ def test_maintaining_line_breaks():
|
|||
(ok ok)
|
||||
; Don't break here.
|
||||
(ok)
|
||||
*SPACE*
|
||||
*SPACE**SPACE*
|
||||
; ^ Do keep this break though.
|
||||
(ok)
|
||||
*SPACE*
|
||||
*SPACE**SPACE*
|
||||
; ^ This should only be one break.
|
||||
(ok)
|
||||
)
|
||||
|
|
@ -337,9 +337,8 @@ def test_maintaining_line_breaks():
|
|||
|
||||
|
||||
def test_trailing_trivia():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok)); Don't lose this!
|
||||
|
||||
|
|
@ -350,7 +349,7 @@ def test_trailing_trivia():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
|
|
@ -363,9 +362,8 @@ def test_trailing_trivia():
|
|||
|
||||
|
||||
def test_trailing_trivia_two():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok))
|
||||
|
||||
|
|
@ -376,7 +374,7 @@ def test_trailing_trivia_two():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
|
|
@ -389,9 +387,8 @@ def test_trailing_trivia_two():
|
|||
|
||||
|
||||
def test_trailing_trivia_split():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok)); Don't lose this!
|
||||
|
||||
|
|
@ -432,7 +429,7 @@ def test_trailing_trivia_split():
|
|||
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
|
||||
|
||||
trivia_doc = runtime.Matcher(
|
||||
builder.MatcherTable(ParseTable([], [], set()), {}, {}),
|
||||
builder.MatcherTable(ParseTable([], [], set(), {}), {}, {}),
|
||||
TRIVIA_MODES,
|
||||
).apply_post_trivia(
|
||||
token.post_trivia,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue