[all] A whole new style for grammars

Say good by to the sea of `self.`!
This commit is contained in:
John Doty 2024-11-09 11:21:30 -08:00
parent d6f1e7aba1
commit 5064a768e7
10 changed files with 1097 additions and 1318 deletions

View file

@ -20,503 +20,498 @@ from parser import (
sp, sp,
) )
@rule("File")
def file() -> Rule:
return _file_statement_list
class FineGrammar(Grammar): @rule
# generator = parser.GenerateLR1 def _file_statement_list() -> Rule:
# generator = parser.GeneratePager return alt(
start = "File" _file_statement,
_file_statement_list + nl + _file_statement,
)
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"] @rule
def _file_statement() -> Rule:
return (
import_statement | class_declaration | export_statement | _statement
)
pretty_indent = " " @rule
def import_statement() -> Rule:
return group(
IMPORT, sp, STRING, sp, AS, sp, IDENTIFIER, sp, SEMICOLON
)
def __init__(self): @rule("ClassDeclaration")
super().__init__( def class_declaration() -> Rule:
precedence=[ return seq(
(Assoc.RIGHT, [self.EQUAL]), group(
(Assoc.LEFT, [self.OR]), CLASS,
(Assoc.LEFT, [self.IS]), sp,
(Assoc.LEFT, [self.AND]), mark(IDENTIFIER, field="name", highlight=highlight.entity.name.type),
(Assoc.LEFT, [self.EQUALEQUAL, self.BANGEQUAL]), sp,
(Assoc.LEFT, [self.LESS, self.GREATER, self.GREATEREQUAL, self.LESSEQUAL]), LCURLY,
(Assoc.LEFT, [self.PLUS, self.MINUS]), ),
(Assoc.LEFT, [self.STAR, self.SLASH]), indent(nl, mark(opt(class_body), field="body")),
(Assoc.LEFT, [self.primary_expression]), nl,
(Assoc.LEFT, [self.LPAREN]), RCURLY,
(Assoc.LEFT, [self.DOT]), nl, # Extra newline at the end of the class
# )
# If there's a confusion about whether to make an IF
# statement or an expression, prefer the statement.
#
(Assoc.NONE, [self.if_statement]),
],
)
@rule("File") @rule("ClassBody")
def file(self) -> Rule: def class_body() -> Rule:
return self._file_statement_list return _class_members
@rule @rule
def _file_statement_list(self) -> Rule: def _class_members() -> Rule:
return alt( return _class_member | seq(_class_members, nl, _class_member)
self._file_statement,
self._file_statement_list + nl + self._file_statement,
)
@rule @rule
def _file_statement(self) -> Rule: def _class_member() -> Rule:
return ( return field_declaration | function_declaration
self.import_statement | self.class_declaration | self.export_statement | self._statement
)
@rule @rule("FieldDecl")
def import_statement(self) -> Rule: def field_declaration() -> Rule:
return group( return group(IDENTIFIER, COLON, sp, type_expression, SEMICOLON)
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
)
@rule("ClassDeclaration") # Types
def class_declaration(self) -> Rule: @rule("TypeExpression")
return seq( def type_expression() -> Rule:
group( return alternate_type | type_identifier
self.CLASS,
sp,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
sp,
self.LCURLY,
),
indent(nl, mark(opt(self.class_body), field="body")),
nl,
self.RCURLY,
nl, # Extra newline at the end of the class
)
@rule("ClassBody") @rule("AlternateType")
def class_body(self) -> Rule: def alternate_type() -> Rule:
return self._class_members return group(type_expression, sp, OR, sp, type_identifier)
@rule @rule("TypeIdentifier")
def _class_members(self) -> Rule: def type_identifier() -> Rule:
return self._class_member | seq(self._class_members, nl, self._class_member) return mark(IDENTIFIER, field="id", highlight=highlight.entity.name.type)
@rule @rule
def _class_member(self) -> Rule: def export_statement() -> Rule:
return self.field_declaration | self.function_declaration return alt(
group(EXPORT, sp, class_declaration),
group(EXPORT, sp, function_declaration),
group(EXPORT, sp, let_statement),
group(EXPORT, sp, export_list, SEMICOLON),
)
@rule("FieldDecl") @rule
def field_declaration(self) -> Rule: def export_list() -> Rule:
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON) return IDENTIFIER | seq(IDENTIFIER, COMMA, sp, export_list)
# Types # Functions
@rule("TypeExpression") @rule("FunctionDecl")
def type_expression(self) -> Rule: def function_declaration() -> Rule:
return self.alternate_type | self.type_identifier return seq(
group(
@rule("AlternateType")
def alternate_type(self) -> Rule:
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
@rule("TypeIdentifier")
def type_identifier(self) -> Rule:
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
@rule
def export_statement(self) -> Rule:
return alt(
group(self.EXPORT, sp, self.class_declaration),
group(self.EXPORT, sp, self.function_declaration),
group(self.EXPORT, sp, self.let_statement),
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
)
@rule
def export_list(self) -> Rule:
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
# Functions
@rule("FunctionDecl")
def function_declaration(self) -> Rule:
return seq(
group( group(
group( group(
group( FUN,
self.FUN, sp,
sp, mark(
mark( IDENTIFIER,
self.IDENTIFIER, field="name",
field="name", highlight=highlight.entity.name.function,
highlight=highlight.entity.name.function,
),
), ),
nl,
mark(self.function_parameters, field="parameters"),
), ),
mark(
opt(indent(sp, group(self.ARROW, sp, self.type_expression))),
field="return_type",
),
),
sp,
mark(self.block, field="body"),
nl,
)
@rule("ParamList")
def function_parameters(self) -> Rule:
return group(
self.LPAREN,
indent(
nl, nl,
opt( mark(function_parameters, field="parameters"),
self._first_parameter
| seq(self._first_parameter, self.COMMA)
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
),
), ),
mark(
opt(indent(sp, group(ARROW, sp, type_expression))),
field="return_type",
),
),
sp,
mark(block, field="body"),
nl,
)
@rule("ParamList")
def function_parameters() -> Rule:
return group(
LPAREN,
indent(
nl, nl,
self.RPAREN, opt(
) _first_parameter
| seq(_first_parameter, COMMA)
@rule | group(_first_parameter, COMMA, sp, _parameter_list)
def _first_parameter(self) -> Rule:
return self.SELF | self.parameter
@rule
def _parameter_list(self) -> Rule:
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
@rule("Parameter")
def parameter(self) -> Rule:
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
# Block
@rule("Block")
def block(self) -> Rule:
return alt(
group(self.LCURLY, nl, self.RCURLY),
group(self.LCURLY, indent(br, self.block_body), sp, self.RCURLY),
)
@rule("BlockBody")
def block_body(self) -> Rule:
return alt(
self.expression,
self._statement_list,
seq(self._statement_list, br, self.expression),
)
@rule
def _statement_list(self) -> Rule:
return self._statement | seq(self._statement_list, br, self._statement)
@rule
def _statement(self) -> Rule:
return (
self.function_declaration
| self.let_statement
| self.return_statement
| self.for_statement
| self.if_statement
| self.while_statement
| self.expression_statement
)
@rule("LetStatement")
def let_statement(self) -> Rule:
return group(
group(
self.LET,
sp,
self.IDENTIFIER,
sp,
self.EQUAL,
), ),
indent(sp, self.expression, self.SEMICOLON), ),
) nl,
RPAREN,
)
@rule("ReturnStatement") @rule
def return_statement(self) -> Rule: def _first_parameter() -> Rule:
return alt( return SELF | parameter
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
group(self.RETURN, self.SEMICOLON),
)
@rule("ForStatement") @rule
def for_statement(self) -> Rule: def _parameter_list() -> Rule:
return group( return parameter | seq(parameter, COMMA, sp, _parameter_list)
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
self.block,
)
@rule("IteratorVariable") @rule("Parameter")
def iterator_variable(self) -> Rule: def parameter() -> Rule:
return self.IDENTIFIER return group(IDENTIFIER, COLON, sp, type_expression)
@rule("IfStatement") # Block
def if_statement(self) -> Rule: @rule("Block")
return self.conditional_expression def block() -> Rule:
return alt(
group(LCURLY, nl, RCURLY),
group(LCURLY, indent(br, block_body), sp, RCURLY),
)
@rule @rule("BlockBody")
def while_statement(self) -> Rule: def block_body() -> Rule:
return group(group(self.WHILE, sp, self.expression), sp, self.block) return alt(
expression,
_statement_list,
seq(_statement_list, br, expression),
)
@rule @rule
def expression_statement(self) -> Rule: def _statement_list() -> Rule:
return seq(self.expression, self.SEMICOLON) return _statement | seq(_statement_list, br, _statement)
# Expressions @rule
@rule(transparent=True) def _statement() -> Rule:
def expression(self) -> Rule: return (
return self.binary_expression | self.is_expression | self.primary_expression function_declaration
| let_statement
| return_statement
| for_statement
| if_statement
| while_statement
| expression_statement
)
@rule("BinaryExpression") @rule("LetStatement")
def binary_expression(self) -> Rule: def let_statement() -> Rule:
return alt( return group(
# Assignment gets special indentation. group(
group(group(self.expression, sp, self.EQUAL), indent(sp, self.expression)), LET,
# Other ones do not.
group(group(self.expression, sp, self.OR), sp, self.expression),
group(group(self.expression, sp, self.AND), sp, self.expression),
group(group(self.expression, sp, self.EQUALEQUAL), sp, self.expression),
group(group(self.expression, sp, self.BANGEQUAL), sp, self.expression),
group(group(self.expression, sp, self.LESS), sp, self.expression),
group(group(self.expression, sp, self.LESSEQUAL), sp, self.expression),
group(group(self.expression, sp, self.GREATER), sp, self.expression),
group(group(self.expression, sp, self.GREATEREQUAL), sp, self.expression),
group(group(self.expression, sp, self.PLUS), sp, self.expression),
group(group(self.expression, sp, self.MINUS), sp, self.expression),
group(group(self.expression, sp, self.STAR), sp, self.expression),
group(group(self.expression, sp, self.SLASH), sp, self.expression),
)
@rule("IsExpression")
def is_expression(self) -> Rule:
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
@rule
def primary_expression(self) -> Rule:
return (
self.identifier_expression
| self.literal_expression
| self.SELF
| seq(self.BANG, self.primary_expression)
| seq(self.MINUS, self.primary_expression)
| self.block
| self.conditional_expression
| self.list_constructor_expression
| self.object_constructor_expression
| self.match_expression
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
| group(
self.primary_expression,
self.LPAREN,
indent(nl, self._expression_list),
nl,
self.RPAREN,
)
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
)
@rule("IdentifierExpression")
def identifier_expression(self):
return self.IDENTIFIER
@rule("Literal")
def literal_expression(self):
return self.NUMBER | self.STRING | self.TRUE | self.FALSE
@rule("ConditionalExpression")
def conditional_expression(self) -> Rule:
return (
seq(group(self.IF, sp, self.expression), sp, self.block)
| seq(
group(self.IF, sp, self.expression),
sp,
self.block,
sp,
self.ELSE,
sp,
self.conditional_expression,
)
| seq(
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
)
)
@rule
def list_constructor_expression(self) -> Rule:
return alt(
group(self.LSQUARE, nl, self.RSQUARE),
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
)
@rule
def _expression_list(self) -> Rule:
return (
self.expression
| seq(self.expression, self.COMMA)
| seq(self.expression, self.COMMA, sp, self._expression_list)
)
@rule
def match_expression(self) -> Rule:
return group(
group(self.MATCH, sp, self.expression, sp, self.LCURLY),
indent(sp, self.match_arms),
sp, sp,
self.RCURLY, IDENTIFIER,
) sp,
EQUAL,
@rule("MatchArms")
def match_arms(self) -> Rule:
return self._match_arms
@rule
def _match_arms(self) -> Rule:
return (
self.match_arm
| seq(self.match_arm, self.COMMA)
| seq(self.match_arm, self.COMMA, br, self._match_arms)
)
@rule("MatchArm")
def match_arm(self) -> Rule:
return group(self.pattern, sp, self.ARROW, sp, self.expression)
@rule("Pattern")
def pattern(self) -> Rule:
return (
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
| group(self.variable_binding, self._pattern_core)
| self._pattern_core
)
@rule
def _pattern_core(self) -> Rule:
return self.type_expression | self.wildcard_pattern
@rule("WildcardPattern")
def wildcard_pattern(self) -> Rule:
return self.UNDERSCORE
@rule("VariableBinding")
def variable_binding(self) -> Rule:
return seq(self.IDENTIFIER, self.COLON)
@rule
def object_constructor_expression(self) -> Rule:
return group(self.NEW, sp, self.type_identifier, sp, self.field_list)
@rule
def field_list(self) -> Rule:
return alt(
seq(self.LCURLY, self.RCURLY),
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
)
@rule
def field_values(self) -> Rule:
return (
self.field_value
| seq(self.field_value, self.COMMA)
| seq(self.field_value, self.COMMA, sp, self.field_values)
)
@rule
def field_value(self) -> Rule:
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
highlight=highlight.comment.line,
trivia_mode=TriviaMode.LineComment,
)
ARROW = Terminal("->", highlight=highlight.keyword.operator)
AS = Terminal("as", highlight=highlight.keyword.operator.expression)
BAR = Terminal("|", highlight=highlight.keyword.operator.expression)
CLASS = Terminal("class", highlight=highlight.storage.type.klass)
COLON = Terminal(":", highlight=highlight.punctuation.separator)
ELSE = Terminal("else", highlight=highlight.keyword.control.conditional)
FOR = Terminal("for", highlight=highlight.keyword.control)
FUN = Terminal("fun", highlight=highlight.storage.type.function)
IDENTIFIER = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
), ),
indent(sp, expression, SEMICOLON),
) )
IF = Terminal("if", highlight=highlight.keyword.control.conditional)
IMPORT = Terminal("import", highlight=highlight.keyword.other) @rule("ReturnStatement")
IN = Terminal("in", highlight=highlight.keyword.operator) def return_statement() -> Rule:
LCURLY = Terminal("{", highlight=highlight.punctuation.curly_brace.open) return alt(
RCURLY = Terminal("}", highlight=highlight.punctuation.curly_brace.close) group(RETURN, indent(sp, group(expression, SEMICOLON))),
LET = Terminal("let", highlight=highlight.keyword.other) group(RETURN, SEMICOLON),
RETURN = Terminal("return", highlight=highlight.keyword.control) )
SEMICOLON = Terminal(";", highlight=highlight.punctuation.separator)
STRING = Terminal( @rule("ForStatement")
# Double-quoted string. def for_statement() -> Rule:
Re.seq( return group(
Re.literal('"'), group(FOR, sp, iterator_variable, sp, IN, sp, group(expression)),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(), block,
Re.literal('"'), )
@rule("IteratorVariable")
def iterator_variable() -> Rule:
return IDENTIFIER
@rule("IfStatement")
def if_statement() -> Rule:
return conditional_expression
@rule
def while_statement() -> Rule:
return group(group(WHILE, sp, expression), sp, block)
@rule
def expression_statement() -> Rule:
return seq(expression, SEMICOLON)
# Expressions
@rule(transparent=True)
def expression() -> Rule:
return binary_expression | is_expression | primary_expression
@rule("BinaryExpression")
def binary_expression() -> Rule:
return alt(
# Assignment gets special indentation.
group(group(expression, sp, EQUAL), indent(sp, expression)),
# Other ones do not.
group(group(expression, sp, OR), sp, expression),
group(group(expression, sp, AND), sp, expression),
group(group(expression, sp, EQUALEQUAL), sp, expression),
group(group(expression, sp, BANGEQUAL), sp, expression),
group(group(expression, sp, LESS), sp, expression),
group(group(expression, sp, LESSEQUAL), sp, expression),
group(group(expression, sp, GREATER), sp, expression),
group(group(expression, sp, GREATEREQUAL), sp, expression),
group(group(expression, sp, PLUS), sp, expression),
group(group(expression, sp, MINUS), sp, expression),
group(group(expression, sp, STAR), sp, expression),
group(group(expression, sp, SLASH), sp, expression),
)
@rule("IsExpression")
def is_expression() -> Rule:
return group(expression, sp, IS, indent(sp, pattern))
@rule
def primary_expression() -> Rule:
return (
identifier_expression
| literal_expression
| SELF
| seq(BANG, primary_expression)
| seq(MINUS, primary_expression)
| block
| conditional_expression
| list_constructor_expression
| object_constructor_expression
| match_expression
| seq(primary_expression, LPAREN, RPAREN)
| group(
primary_expression,
LPAREN,
indent(nl, _expression_list),
nl,
RPAREN,
) )
# Single-quoted string. | group(primary_expression, indent(nl, DOT, IDENTIFIER))
| Re.seq( | group(LPAREN, indent(nl, expression), nl, RPAREN)
Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal("'"),
),
highlight=highlight.string.quoted,
) )
WHILE = Terminal("while", highlight=highlight.keyword.control)
EQUAL = Terminal("=", highlight=highlight.keyword.operator.expression) @rule("IdentifierExpression")
LPAREN = Terminal("(", highlight=highlight.punctuation.parenthesis.open) def identifier_expression():
RPAREN = Terminal(")", highlight=highlight.punctuation.parenthesis.close) return IDENTIFIER
COMMA = Terminal(",", highlight=highlight.punctuation.separator)
SELF = Terminal("self", name="SELFF", highlight=highlight.variable.language) @rule("Literal")
OR = Terminal("or", highlight=highlight.keyword.operator.expression) def literal_expression():
IS = Terminal("is", highlight=highlight.keyword.operator.expression) return NUMBER | STRING | TRUE | FALSE
AND = Terminal("and", highlight=highlight.keyword.operator.expression)
EQUALEQUAL = Terminal("==", highlight=highlight.keyword.operator.expression) @rule("ConditionalExpression")
BANGEQUAL = Terminal("!=", highlight=highlight.keyword.operator.expression) def conditional_expression() -> Rule:
LESS = Terminal("<", highlight=highlight.keyword.operator.expression) return (
GREATER = Terminal(">", highlight=highlight.keyword.operator.expression) seq(group(IF, sp, expression), sp, block)
LESSEQUAL = Terminal("<=", highlight=highlight.keyword.operator.expression) | seq(
GREATEREQUAL = Terminal(">=", highlight=highlight.keyword.operator.expression) group(IF, sp, expression),
PLUS = Terminal("+", highlight=highlight.keyword.operator.expression) sp,
MINUS = Terminal("-", highlight=highlight.keyword.operator.expression) block,
STAR = Terminal("*", highlight=highlight.keyword.operator.expression) sp,
SLASH = Terminal("/", highlight=highlight.keyword.operator.expression) ELSE,
NUMBER = Terminal( sp,
conditional_expression,
)
| seq(
group(IF, sp, expression), sp, block, sp, ELSE, sp, block
)
)
@rule
def list_constructor_expression() -> Rule:
return alt(
group(LSQUARE, nl, RSQUARE),
group(LSQUARE, indent(nl, _expression_list), nl, RSQUARE),
)
@rule
def _expression_list() -> Rule:
return (
expression
| seq(expression, COMMA)
| seq(expression, COMMA, sp, _expression_list)
)
@rule
def match_expression() -> Rule:
return group(
group(MATCH, sp, expression, sp, LCURLY),
indent(sp, match_arms),
sp,
RCURLY,
)
@rule("MatchArms")
def match_arms() -> Rule:
return _match_arms
@rule
def _match_arms() -> Rule:
return (
match_arm
| seq(match_arm, COMMA)
| seq(match_arm, COMMA, br, _match_arms)
)
@rule("MatchArm")
def match_arm() -> Rule:
return group(pattern, sp, ARROW, sp, expression)
@rule("Pattern")
def pattern() -> Rule:
return (
group(variable_binding, _pattern_core, sp, AND, sp, expression)
| group(variable_binding, _pattern_core)
| _pattern_core
)
@rule
def _pattern_core() -> Rule:
return type_expression | wildcard_pattern
@rule("WildcardPattern")
def wildcard_pattern() -> Rule:
return UNDERSCORE
@rule("VariableBinding")
def variable_binding() -> Rule:
return seq(IDENTIFIER, COLON)
@rule
def object_constructor_expression() -> Rule:
return group(NEW, sp, type_identifier, sp, field_list)
@rule
def field_list() -> Rule:
return alt(
seq(LCURLY, RCURLY),
group(LCURLY, indent(nl, field_values), nl, RCURLY),
)
@rule
def field_values() -> Rule:
return (
field_value
| seq(field_value, COMMA)
| seq(field_value, COMMA, sp, field_values)
)
@rule
def field_value() -> Rule:
return IDENTIFIER | group(IDENTIFIER, COLON, indent(sp, expression))
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
"COMMENT",
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
highlight=highlight.comment.line,
trivia_mode=TriviaMode.LineComment,
)
ARROW = Terminal("ARROW", "->", highlight=highlight.keyword.operator)
AS = Terminal("AS", "as", highlight=highlight.keyword.operator.expression)
BAR = Terminal("BAR", "|", highlight=highlight.keyword.operator.expression)
CLASS = Terminal("CLASS", "class", highlight=highlight.storage.type.klass)
COLON = Terminal("COLON", ":", highlight=highlight.punctuation.separator)
ELSE = Terminal("ELSE", "else", highlight=highlight.keyword.control.conditional)
FOR = Terminal("FOR", "for", highlight=highlight.keyword.control)
FUN = Terminal("FUN", "fun", highlight=highlight.storage.type.function)
IDENTIFIER = Terminal(
"IDENTIFIER",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
IF = Terminal("IF", "if", highlight=highlight.keyword.control.conditional)
IMPORT = Terminal("IMPORT", "import", highlight=highlight.keyword.other)
IN = Terminal("IN", "in", highlight=highlight.keyword.operator)
LCURLY = Terminal("LCURLY", "{", highlight=highlight.punctuation.curly_brace.open)
RCURLY = Terminal("RCURLY", "}", highlight=highlight.punctuation.curly_brace.close)
LET = Terminal("LET", "let", highlight=highlight.keyword.other)
RETURN = Terminal("RETURN", "return", highlight=highlight.keyword.control)
SEMICOLON = Terminal("SEMICOLON", ";", highlight=highlight.punctuation.separator)
STRING = Terminal(
"STRING",
# Double-quoted string.
Re.seq(
Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal('"'),
)
# Single-quoted string.
| Re.seq(
Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal("'"),
),
highlight=highlight.string.quoted,
)
WHILE = Terminal("WHILE", "while", highlight=highlight.keyword.control)
EQUAL = Terminal("EQUAL", "=", highlight=highlight.keyword.operator.expression)
LPAREN = Terminal("LPAREN", "(", highlight=highlight.punctuation.parenthesis.open)
RPAREN = Terminal("RPAREN", ")", highlight=highlight.punctuation.parenthesis.close)
COMMA = Terminal("COMMA", ",", highlight=highlight.punctuation.separator)
SELF = Terminal("SELFF", "self", highlight=highlight.variable.language)
OR = Terminal("OR", "or", highlight=highlight.keyword.operator.expression)
IS = Terminal("IS", "is", highlight=highlight.keyword.operator.expression)
AND = Terminal("AND", "and", highlight=highlight.keyword.operator.expression)
EQUALEQUAL = Terminal("EQUALEQUAL", "==", highlight=highlight.keyword.operator.expression)
BANGEQUAL = Terminal("BANGEQUAL", "!=", highlight=highlight.keyword.operator.expression)
LESS = Terminal("LESS", "<", highlight=highlight.keyword.operator.expression)
GREATER = Terminal("GREATER", ">", highlight=highlight.keyword.operator.expression)
LESSEQUAL = Terminal("LESSEQUAL", "<=", highlight=highlight.keyword.operator.expression)
GREATEREQUAL = Terminal("GREATEREQUAL", ">=", highlight=highlight.keyword.operator.expression)
PLUS = Terminal("PLUS", "+", highlight=highlight.keyword.operator.expression)
MINUS = Terminal("MINUS", "-", highlight=highlight.keyword.operator.expression)
STAR = Terminal("STAR", "*", highlight=highlight.keyword.operator.expression)
SLASH = Terminal("SLASH", "/", highlight=highlight.keyword.operator.expression)
NUMBER = Terminal(
"NUMBER",
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq( Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
Re.seq( ).question(),
Re.literal("."), Re.seq(
Re.set(("0", "9")).plus(), Re.set("e", "E"),
).question(), Re.set("+", "-").question(),
Re.seq( Re.set(("0", "9")).plus(),
Re.set("e", "E"), ).question(),
Re.set("+", "-").question(), ),
Re.set(("0", "9")).plus(), highlight=highlight.constant.numeric,
).question(), )
), TRUE = Terminal("TRUE", "true", highlight=highlight.constant.language)
highlight=highlight.constant.numeric, FALSE = Terminal("FALSE", "false", highlight=highlight.constant.language)
) BANG = Terminal("BANG", "!", highlight=highlight.keyword.operator.expression)
TRUE = Terminal("true", highlight=highlight.constant.language) DOT = Terminal("DOT", ".", highlight=highlight.punctuation.separator)
FALSE = Terminal("false", highlight=highlight.constant.language) MATCH = Terminal("MATCH", "match", highlight=highlight.keyword.other)
BANG = Terminal("!", highlight=highlight.keyword.operator.expression) EXPORT = Terminal("EXPORT", "export", highlight=highlight.keyword.other)
DOT = Terminal(".", highlight=highlight.punctuation.separator) UNDERSCORE = Terminal("UNDERSCORE", "_", highlight=highlight.variable.language)
MATCH = Terminal("match", highlight=highlight.keyword.other) NEW = Terminal("NEW", "new", highlight=highlight.keyword.operator)
EXPORT = Terminal("export", highlight=highlight.keyword.other) LSQUARE = Terminal("LSQUARE", "[", highlight=highlight.punctuation.square_bracket.open)
UNDERSCORE = Terminal("_", highlight=highlight.variable.language) RSQUARE = Terminal("RSQUARE", "]", highlight=highlight.punctuation.square_bracket.close)
NEW = Terminal("new", highlight=highlight.keyword.operator)
LSQUARE = Terminal("[", highlight=highlight.punctuation.square_bracket.open)
RSQUARE = Terminal("]", highlight=highlight.punctuation.square_bracket.close)
FineGrammar=Grammar(
start=file,
trivia=[BLANKS, LINE_BREAK, COMMENT],
pretty_indent=" ",
precedence=[
(Assoc.RIGHT, [EQUAL]),
(Assoc.LEFT, [OR]),
(Assoc.LEFT, [IS]),
(Assoc.LEFT, [AND]),
(Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
(Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
(Assoc.LEFT, [primary_expression]),
(Assoc.LEFT, [LPAREN]),
(Assoc.LEFT, [DOT]),
#
# If there's a confusion about whether to make an IF
# statement or an expression, prefer the statement.
#
(Assoc.NONE, [if_statement]),
],
)
if __name__ == "__main__": if __name__ == "__main__":
from pathlib import Path from pathlib import Path
@ -525,7 +520,7 @@ if __name__ == "__main__":
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
# TODO: Actually generate a lexer/parser for some runtime. # TODO: Actually generate a lexer/parser for some runtime.
grammar = FineGrammar() grammar = FineGrammar
table = grammar.build_table() table = grammar.build_table()
# print(table.format()) # print(table.format())

View file

@ -25,8 +25,6 @@ class FaceQuery:
def gather_faces(grammar: parser.Grammar): def gather_faces(grammar: parser.Grammar):
nts = {nt.name: nt for nt in grammar.non_terminals()}
def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]: def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]:
parts = [] parts = []
for item in input: for item in input:
@ -52,13 +50,12 @@ def gather_faces(grammar: parser.Grammar):
) )
) )
elif isinstance(item, str): elif isinstance(item, parser.NonTerminal):
nt = nts[item] if item.transparent:
if nt.transparent: if item.name in visited:
if nt.name in visited:
continue continue
visited.add(nt.name) visited.add(item.name)
body = nt.fn(grammar) body = item.definition
for production in body.flatten(with_metadata=True): for production in body.flatten(with_metadata=True):
parts.extend(scoop(node, production, visited)) parts.extend(scoop(node, production, visited))
@ -69,7 +66,7 @@ def gather_faces(grammar: parser.Grammar):
if rule.transparent: if rule.transparent:
continue continue
body = rule.fn(grammar) body = rule.definition
for production in body.flatten(with_metadata=True): for production in body.flatten(with_metadata=True):
queries.extend(scoop(rule.name, production, set())) queries.extend(scoop(rule.name, production, set()))

View file

@ -17,25 +17,24 @@ the thing that processes the tables.
## Making Grammars ## Making Grammars
To get started, create a grammar that derives from the `Grammar` class. Create Define a series of terminals (with `Terminal`) and rules (as functions decorated
one method per nonterminal, decorated with the `rule` decorator. Here's an with `@rule`), and then pass the starting rule to the constructor of a `Grammar`
example: object:
@rule
def expression(self):
return seq(self.expression, self.PLUS, self.term) | self.term
class SimpleGrammar(Grammar): @rule
@rule def term(self):
def expression(self): return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID
return seq(self.expression, self.PLUS, self.term) | self.term
@rule PLUS = Terminal('+')
def term(self): LPAREN = Terminal('(')
return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID RPAREN = Terminal(')')
ID = Terminal('id')
PLUS = Terminal('+')
LPAREN = Terminal('(')
RPAREN = Terminal(')')
ID = Terminal('id')
grammar = Grammar(start=expression)
## Using grammars ## Using grammars
@ -1533,7 +1532,9 @@ class ParserGenerator:
return builder.flush(config_sets) return builder.flush(config_sets)
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"] FlattenedWithMetadata = list[
"NonTerminal|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"
]
############################################################################### ###############################################################################
@ -1578,26 +1579,32 @@ class Rule:
class Terminal(Rule): class Terminal(Rule):
"""A token, or terminal symbol in the grammar.""" """A token, or terminal symbol in the grammar."""
name: str | None name: str
pattern: "str | Re" pattern: "str | Re"
meta: dict[str, typing.Any] meta: dict[str, typing.Any]
regex: bool regex: bool
error_name: str | None error_name: str | None
definition_location: str
def __init__( def __init__(
self, self,
name: str,
pattern: "str|Re", pattern: "str|Re",
*, *,
name: str | None = None,
error_name: str | None = None, error_name: str | None = None,
**kwargs, **kwargs,
): ):
# TODO: Consider identifying the name from some kind of globals
# dictionary or something if necessary.
self.name = name self.name = name
self.pattern = pattern self.pattern = pattern
self.meta = kwargs self.meta = kwargs
self.regex = isinstance(pattern, Re) self.regex = isinstance(pattern, Re)
self.error_name = error_name self.error_name = error_name
caller = inspect.stack()[1]
self.definition_location = f"{caller.filename}:{caller.lineno}"
def flatten( def flatten(
self, with_metadata: bool = False self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]: ) -> typing.Generator[FlattenedWithMetadata, None, None]:
@ -1617,14 +1624,17 @@ class NonTerminal(Rule):
grammar class. grammar class.
""" """
fn: typing.Callable[["Grammar"], Rule] fn: typing.Callable[[], Rule]
name: str name: str
transparent: bool transparent: bool
error_name: str | None error_name: str | None
definition_location: str
_definition: Rule | None
_body: "list[list[NonTerminal | Terminal]] | None"
def __init__( def __init__(
self, self,
fn: typing.Callable[["Grammar"], Rule], fn: typing.Callable[[], Rule],
name: str | None = None, name: str | None = None,
transparent: bool = False, transparent: bool = False,
error_name: str | None = None, error_name: str | None = None,
@ -1645,22 +1655,37 @@ class NonTerminal(Rule):
self.name = name or fn.__name__ self.name = name or fn.__name__
self.transparent = transparent self.transparent = transparent
self.error_name = error_name self.error_name = error_name
self._definition = None
self._body = None
def generate_body(self, grammar) -> list[list[str | Terminal]]: caller = inspect.stack()[1]
"""Generate the body of the non-terminal. self.definition_location = f"{caller.filename}:{caller.lineno}"
We do this by first calling the associated function in order to get a @property
Rule, and then flattening the Rule into the associated set of def definition(self) -> Rule:
productions. We strip the metadata from the flattened result to make """The rule that is the definition of this nonterminal.
life a little easier for the caller.
(As opposed this rule itself, which is... itself.)
"""
if self._definition is None:
self._definition = self.fn()
return self._definition
@property
def body(self) -> "list[list[NonTerminal | Terminal]]":
"""The flattened body of the nonterminal: a list of productions where
each production is a sequence of Terminals and NonTerminals.
""" """
def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]: def without_metadata(result: FlattenedWithMetadata) -> list[NonTerminal | Terminal]:
for item in result: for item in result:
assert not isinstance(item, tuple) assert not isinstance(item, tuple)
return typing.cast(list[str | Terminal], result) return typing.cast(list[NonTerminal | Terminal], result)
return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)] if self._body is None:
self._body = [without_metadata(rule) for rule in self.fn().flatten(with_metadata=False)]
return self._body
def flatten( def flatten(
self, with_metadata: bool = False self, with_metadata: bool = False
@ -1669,7 +1694,7 @@ class NonTerminal(Rule):
# the context of some other production. Yield ourselves, and trust that # the context of some other production. Yield ourselves, and trust that
# in time we will be asked to generate our body. # in time we will be asked to generate our body.
del with_metadata del with_metadata
yield [self.name] yield [self]
class AlternativeRule(Rule): class AlternativeRule(Rule):
@ -1775,7 +1800,7 @@ def mark(rule: Rule, **kwargs) -> Rule:
@typing.overload @typing.overload
def rule(f: typing.Callable, /) -> Rule: ... def rule(f: typing.Callable, /) -> NonTerminal: ...
@typing.overload @typing.overload
@ -1783,16 +1808,15 @@ def rule(
name: str | None = None, name: str | None = None,
transparent: bool | None = None, transparent: bool | None = None,
error_name: str | None = None, error_name: str | None = None,
) -> typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]: ... ) -> typing.Callable[[typing.Callable[[], Rule]], NonTerminal]: ...
def rule( def rule(
name: str | None | typing.Callable = None, name: str | None | typing.Callable = None,
transparent: bool | None = None, transparent: bool | None = None,
error_name: str | None = None, error_name: str | None = None,
) -> Rule | typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]: ) -> NonTerminal | typing.Callable[[typing.Callable[[], Rule]], NonTerminal]:
"""The decorator that marks a method in a Grammar object as a nonterminal """The decorator that marks a function as a nonterminal rule.
rule.
As with all the best decorators, it can be called with or without arguments. As with all the best decorators, it can be called with or without arguments.
If called with one argument, that argument is a name that overrides the name If called with one argument, that argument is a name that overrides the name
@ -1801,7 +1825,7 @@ def rule(
if callable(name): if callable(name):
return rule()(name) return rule()(name)
def wrapper(f: typing.Callable[[typing.Any], Rule]): def wrapper(f: typing.Callable[[], Rule]):
nonlocal name nonlocal name
nonlocal transparent nonlocal transparent
nonlocal error_name nonlocal error_name
@ -2746,145 +2770,150 @@ class TriviaMode(enum.Enum):
############################################################################### ###############################################################################
# Finally, the base class for grammars # Finally, the grammar class.
############################################################################### ###############################################################################
PrecedenceList = list[typing.Tuple[Assoc, list[Rule | str]]] PrecedenceList = list[typing.Tuple[Assoc, list[Terminal|NonTerminal]]]
def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str,NonTerminal], dict[str,Terminal]]:
"""Starting from the given NonTerminal, gather all of the symbols
(NonTerminals and Terminals) that make up the grammar.
"""
# NOTE: We use a dummy dictionary here to preserve insertion order.
# That way the first element in named_rules is always the start
# symbol!
rules: dict[NonTerminal, int] = {}
terminals: dict[Terminal, int] = {}
# STEP 1 is to just gather all of the symbols that we can find.
queue: list[NonTerminal] = [start]
while len(queue) > 0:
nt = queue.pop()
if nt in rules:
continue
# TODO: Here we can track modules (via the funcitons that make up
# nonterminals, maybe) and maybe use that to infer terminal
# names.
rules[nt] = len(rules)
for rule in nt.body:
for symbol in rule:
if isinstance(symbol, NonTerminal):
if symbol not in rules:
queue.append(symbol)
elif isinstance(symbol, Terminal):
terminals[symbol] = len(terminals)
else:
typing.assert_never(symbol)
# (Terminals are also reachable!)
for symbol in trivia:
terminals[symbol] = len(terminals)
# Step 2 is to organize all of these things and check them for errors.
named_rules: dict[str, NonTerminal] = {}
for rule in rules:
existing = named_rules.get(rule.name)
if existing is not None:
# TODO TEST
raise ValueError(f"""Found more than one rule named {rule.name}:
- {existing.definition_location}
- {rule.definition_location}""")
named_rules[rule.name] = rule
named_terminals: dict[str, Terminal] = {}
for terminal in terminals:
existing = named_terminals.get(terminal.name)
if existing is not None:
# TODO TEST
raise ValueError(f"""Found more than one terminal named {terminal.name}:
- {existing.definition_location}
- {terminal.definition_location}""")
existing_rule = named_rules.get(terminal.name)
if existing_rule is not None:
# TODO TEST
raise ValueError(f"""Found a terminal and a rule both named {terminal.name}:
- The rule was defined at {existing_rule.definition_location}
- The terminal was defined at {terminal.definition_location}""")
named_terminals[terminal.name] = terminal
return (named_rules, named_terminals)
class Grammar: class Grammar:
"""The base class for defining a grammar. """A container that holds all the terminals and nonterminals for a
given grammar. The terminals and nonterminals are defined elsewhere;
Inherit from this, and and define members for your nonterminals, and then provide the starting rule and this object will build the grammar from
use the `build_table` method to construct the parse tables. everything accessible.
Here's an example of a simple grammar: Here's an example of a simple grammar:
class SimpleGrammar(Grammar): @rule
@rule def expression(self):
def expression(self): return seq(self.expression, self.PLUS, self.term) | self.term
return seq(self.expression, self.PLUS, self.term) | self.term
@rule @rule
def term(self): def term(self):
return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID return seq(self.LPAREN, self.expression, self.RPAREN) | self.ID
PLUS = Terminal('+') PLUS = Terminal('+')
LPAREN = Terminal('(') LPAREN = Terminal('(')
RPAREN = Terminal(')') RPAREN = Terminal(')')
ID = Terminal('id') ID = Terminal('id')
grammar = Grammar(start=expression)
Not very exciting, perhaps, but it's something. Not very exciting, perhaps, but it's something.
""" """
_precedence: dict[str, typing.Tuple[Assoc, int]] start: NonTerminal
_generator: type[ParserGenerator] name: str
pretty_indent: str | None
_terminals: dict[str, Terminal] _terminals: dict[str, Terminal]
_nonterminals: dict[str, NonTerminal] _nonterminals: dict[str, NonTerminal]
_trivia: list[Terminal] _trivia: list[Terminal]
_precedence: dict[str, typing.Tuple[Assoc, int]]
def __init__( def __init__(
self, self,
start: str | NonTerminal | None = None, start: NonTerminal,
precedence: PrecedenceList | None = None, precedence: PrecedenceList | None = None,
generator: type[ParserGenerator] | None = None, trivia: list[Terminal] | None = None,
trivia: list[str | Terminal] | None = None,
name: str | None = None, name: str | None = None,
pretty_indent: str | None = None,
): ):
if start is None: if start.transparent:
start = getattr(self, "start", None) # TODO: TEST
if start is None: raise ValueError("The start rule cannot be transparent")
raise ValueError(
"The default start rule must either be specified in the constructor or as an "
"attribute in the class."
)
if isinstance(start, NonTerminal):
start = start.name
if precedence is None: if precedence is None:
precedence = getattr(self, "precedence", []) precedence = []
assert precedence is not None assert precedence is not None
if generator is None:
generator = getattr(self, "generator", ParserGenerator)
assert generator is not None
if trivia is None: if trivia is None:
trivia = getattr(self, "trivia", []) trivia = []
assert trivia is not None assert trivia is not None
# Fixup terminal names with the name of the member that declared it.
terminals = {}
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
if t.name is None:
t.name = n
if n in terminals:
raise ValueError(f"More than one terminal has the name '{n}'")
terminals[n] = t
# Get the nonterminals.
nonterminals = {}
for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)):
if nt.name in nonterminals:
raise ValueError(f"More than one nonterminal found with the name '{nt.name}'")
if nt.name in terminals:
raise ValueError(
f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule"
)
nonterminals[nt.name] = nt
# Resolve the trivia declarations correctly.
resolved_trivia: list[Terminal] = []
for t in trivia:
if isinstance(t, str):
resolved = terminals.get(t)
if resolved is None:
raise ValueError(f"The trivia '{t}' is not a terminal name")
resolved_trivia.append(resolved)
elif isinstance(t, Terminal):
resolved_trivia.append(t)
else:
raise ValueError(f"{t} must be either a terminal name or literally a terminal")
# Fix up the precedence table. # Fix up the precedence table.
precedence_table = {} precedence_table = {}
for prec, (associativity, symbols) in enumerate(precedence): for prec, (associativity, symbols) in enumerate(precedence):
for symbol in symbols: for symbol in symbols:
key = None precedence_table[symbol.name] = (associativity, prec + 1)
if isinstance(symbol, Terminal):
key = symbol.name
if key is None:
raise ValueError(f"{symbol} is a terminal that has not had a name set yet")
elif isinstance(symbol, NonTerminal):
key = symbol.name
elif isinstance(symbol, str):
if symbol in terminals or symbol in nonterminals:
key = symbol
if key is None:
raise ValueError(
f"{symbol} must be either a Token or a NonTerminal, or the name of one"
)
precedence_table[key] = (associativity, prec + 1)
if name is None: if name is None:
name = getattr(self, "name", None) name = "unknown"
if name is None:
name = self.__class__.__name__.removesuffix("Grammar").lower()
self._precedence = precedence_table
self.start = start self.start = start
self._generator = generator
self._terminals = terminals
self._nonterminals = nonterminals
self._trivia = resolved_trivia
self.name = name self.name = name
self._nonterminals, self._terminals = gather_grammar(start, trivia)
self._trivia = trivia
self._precedence = precedence_table
self.pretty_indent = pretty_indent
def terminals(self) -> list[Terminal]: def terminals(self) -> list[Terminal]:
return list(self._terminals.values()) return list(self._terminals.values())
@ -2898,55 +2927,7 @@ class Grammar:
def get_precedence(self, name: str) -> None | tuple[Assoc, int]: def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
return self._precedence.get(name) return self._precedence.get(name)
# TODO: The flattened form should retain NonTerminal, not just str. def desugar(self) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
def generate_nonterminal_dict(
self, start: str | None = None
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
"""Convert the rules into a dictionary of productions, and a set of
the names of transparent nonterminals.
Our table generators work on a very flat set of productions. This is the
first step in flattening the productions from the members: walk the rules
starting from the given start rule and flatten them, one by one, into a
dictionary that maps nonterminal rule name to its associated list of
productions.
"""
if start is None:
start = self.start
nonterminals = self._nonterminals
transparents = {rule.name for rule in nonterminals.values() if rule.transparent}
grammar = {}
rule = nonterminals.get(start)
if rule is None:
raise ValueError(f"Cannot find a rule named '{start}'")
if rule.transparent:
raise ValueError("The start rule cannot be transparent")
queue = [rule]
while len(queue) > 0:
rule = queue.pop()
if rule.name in grammar:
continue
body = rule.generate_body(self)
for clause in body:
for symbol in clause:
if not isinstance(symbol, Terminal):
assert isinstance(symbol, str)
nonterminal = nonterminals.get(symbol)
if nonterminal is None:
raise ValueError(f"While processing {rule.name}: cannot find {symbol}")
queue.append(nonterminal)
grammar[rule.name] = body
return (grammar, transparents)
def desugar(
self, start: str | None = None
) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
"""Convert the rules into a flat list of productions. """Convert the rules into a flat list of productions.
Our table generators work from a very flat set of productions. The form Our table generators work from a very flat set of productions. The form
@ -2954,37 +2935,27 @@ class Grammar:
generate_nonterminal_dict- less useful to people, probably, but it is generate_nonterminal_dict- less useful to people, probably, but it is
the input form needed by the Generator. the input form needed by the Generator.
""" """
temp_grammar, transparents = self.generate_nonterminal_dict(start) grammar: list[tuple[str,list[str]]] = [
(rule.name, [s.name for s in production])
for rule in self._nonterminals.values()
for production in rule.body
]
assert grammar[0][0] == self.start.name
grammar = [] transparents = {name for name, rule in self._nonterminals.items() if rule.transparent}
for rule_name, clauses in temp_grammar.items():
for clause in clauses:
new_clause = []
for symbol in clause:
if isinstance(symbol, Terminal):
if symbol.name in temp_grammar:
raise ValueError(
f"'{symbol.name}' is the name of both a Terminal and a NonTerminal rule. This will cause problems."
)
new_clause.append(symbol.name)
else:
new_clause.append(symbol)
grammar.append((rule_name, new_clause))
return grammar, transparents return grammar, transparents
def build_table(self, start: str | None = None, generator=None) -> ParseTable: def build_table(self) -> ParseTable:
"""Construct a parse table for this grammar, starting at the named """Construct a parse table for this grammar."""
nonterminal rule. desugared, transparents = self.desugar()
"""
if start is None:
start = self.start
desugared, transparents = self.desugar(start)
if generator is None: gen = ParserGenerator(
generator = self._generator self.start.name,
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents) desugared,
precedence=self._precedence,
transparents=transparents,
)
table = gen.gen_table() table = gen.gen_table()
for t in self._trivia: for t in self._trivia:

View file

@ -263,8 +263,7 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
if rule.transparent: if rule.transparent:
rule_name = "_" + rule_name rule_name = "_" + rule_name
body = rule.fn(grammar) rule_definition = convert_to_tree_sitter(rule.definition, grammar)
rule_definition = convert_to_tree_sitter(body, grammar)
if rule_definition is None: if rule_definition is None:
raise Exception(f"Tree-sitter does not support the empty rule {rule_name}") raise Exception(f"Tree-sitter does not support the empty rule {rule_name}")
rule_definition = apply_precedence(rule_definition, rule.name, grammar) rule_definition = apply_precedence(rule_definition, rule.name, grammar)
@ -283,7 +282,6 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str): def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
nts = {nt.name: nt for nt in grammar.non_terminals()}
scope_suffix = "." + grammar.name scope_suffix = "." + grammar.name
def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]: def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]:
@ -300,13 +298,12 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
raise Exception("Highlight must come with a field name") # TODO raise Exception("Highlight must come with a field name") # TODO
parts.append(f"{field_name}: _ @{highlight.scope}{scope_suffix}") parts.append(f"{field_name}: _ @{highlight.scope}{scope_suffix}")
elif isinstance(item, str): elif isinstance(item, parser.NonTerminal):
nt = nts[item] if item.transparent:
if nt.transparent: if item.name in visited:
if nt.name in visited:
continue continue
visited.add(nt.name) visited.add(item.name)
body = nt.fn(grammar) body = item.definition
for production in body.flatten(with_metadata=True): for production in body.flatten(with_metadata=True):
parts.extend(scoop(production, visited)) parts.extend(scoop(production, visited))
@ -317,7 +314,7 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
if rule.transparent: if rule.transparent:
continue continue
body = rule.fn(grammar) body = rule.definition
patterns = set() patterns = set()
for production in body.flatten(with_metadata=True): for production in body.flatten(with_metadata=True):
# Scoop up the meta... # Scoop up the meta...

View file

@ -79,11 +79,7 @@ class MatcherTable:
newline_replace: dict[str, str] newline_replace: dict[str, str]
def _compile_nonterminal_matcher( def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable:
grammar: parser.Grammar,
nonterminals: dict[str, parser.NonTerminal],
rule: parser.NonTerminal,
) -> MatcherTable:
"""Generate a matcher table for a single nonterminal. """Generate a matcher table for a single nonterminal.
See the docs for [MatcherTable] to understand the result. See the docs for [MatcherTable] to understand the result.
@ -111,7 +107,7 @@ def _compile_nonterminal_matcher(
def compile_nonterminal(name: str, rule: parser.NonTerminal): def compile_nonterminal(name: str, rule: parser.NonTerminal):
if name not in visited: if name not in visited:
visited.add(name) visited.add(name)
for production in rule.fn(grammar).flatten(with_metadata=True): for production in rule.fn().flatten(with_metadata=True):
trans_prod = compile_production(production) trans_prod = compile_production(production)
generated_grammar.append((name, trans_prod)) generated_grammar.append((name, trans_prod))
@ -126,19 +122,18 @@ def _compile_nonterminal_matcher(
result = [] result = []
for item in production: for item in production:
if isinstance(item, str): if isinstance(item, parser.NonTerminal):
nt = nonterminals[item] if item.transparent:
if nt.transparent:
# If it's transparent then we make a new set of # If it's transparent then we make a new set of
# productions that covers the contents of the # productions that covers the contents of the
# transparent nonterminal. # transparent nonterminal.
name = "xxx_" + nt.name name = "xxx_" + item.name
compile_nonterminal(name, nt) compile_nonterminal(name, item)
result.append(name) result.append(name)
else: else:
# Otherwise it's a "token" in our input, named # Otherwise it's a "token" in our input, named
# "tree_{whatever}". # "tree_{whatever}".
result.append(f"tree_{item}") result.append(f"tree_{item.name}")
elif isinstance(item, parser.Terminal): elif isinstance(item, parser.Terminal):
# If it's a terminal it will appear in our input as # If it's a terminal it will appear in our input as
@ -257,7 +252,7 @@ def _compile_nonterminal_matcher(
start_name = f"yyy_{rule.name}" start_name = f"yyy_{rule.name}"
compile_nonterminal(start_name, rule) compile_nonterminal(start_name, rule)
gen = grammar._generator(start_name, generated_grammar) gen = parser.ParserGenerator(start_name, generated_grammar)
parse_table = gen.gen_table() parse_table = gen.gen_table()
for (_, replacement), rule_name in newlines.items(): for (_, replacement), rule_name in newlines.items():
@ -296,7 +291,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) ->
matchers = {} matchers = {}
if indent is None: if indent is None:
indent = getattr(grammar, "pretty_indent", None) indent = grammar.pretty_indent
if indent is None: if indent is None:
indent = " " indent = " "
@ -307,7 +302,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) ->
trivia_mode[t.name] = mode trivia_mode[t.name] = mode
for name, rule in nonterminals.items(): for name, rule in nonterminals.items():
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule) matchers[name] = _compile_nonterminal_matcher(rule)
return PrettyTable( return PrettyTable(
indent, indent,

223
sql.py
View file

@ -2,6 +2,7 @@ from parser import *
NAME = Terminal( NAME = Terminal(
"NAME",
Re.seq( Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
@ -9,6 +10,7 @@ NAME = Terminal(
) )
STRING = Terminal( STRING = Terminal(
"STRING",
Re.seq( Re.seq(
Re.literal("'"), Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
@ -18,6 +20,7 @@ STRING = Terminal(
) )
NUMBER = Terminal( NUMBER = Terminal(
"NUMBER",
Re.seq( Re.seq(
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
Re.seq( Re.seq(
@ -33,118 +36,118 @@ NUMBER = Terminal(
highlight=highlight.constant.numeric, highlight=highlight.constant.numeric,
) )
OR = Terminal("or") OR = Terminal("OR", "or")
AND = Terminal("and") AND = Terminal("AND", "and")
NOT = Terminal("not") NOT = Terminal("NOT", "not")
COMPARISON = Terminal( COMPARISON = Terminal(
"COMPARISON",
Re.literal("=") Re.literal("=")
| Re.literal("<>") | Re.literal("<>")
| Re.literal("<") | Re.literal("<")
| Re.literal(">") | Re.literal(">")
| Re.literal("<=") | Re.literal("<=")
| Re.literal(">=") | Re.literal(">="),
) )
PLUS = Terminal("+") PLUS = Terminal("PLUS", "+")
MINUS = Terminal("-") MINUS = Terminal("MINUS", "-")
STAR = Terminal("*") STAR = Terminal("STAR", "*")
SLASH = Terminal("/") SLASH = Terminal("SLASH", "/")
precedence = [ ALL = Terminal("ALL", "all")
(Assoc.LEFT, ["OR"]), AMMSC = Terminal("AMMSC", "ammsc")
(Assoc.LEFT, ["AND"]), ANY = Terminal("ANY", "any")
(Assoc.LEFT, ["NOT"]), AS = Terminal("AS", "as")
(Assoc.LEFT, ["COMPARISON"]), ASC = Terminal("ASC", "asc")
(Assoc.LEFT, ["PLUS", "MINUS"]), AUTHORIZATION = Terminal("AUTHORIZATION", "authorization")
(Assoc.LEFT, ["STAR", "SLASH"]), BETWEEN = Terminal("BETWEEN", "between")
# TODO: Unary minus BY = Terminal("BY", "by")
] CHARACTER = Terminal("CHARACTER", "character")
CHECK = Terminal("CHECK", "check")
CLOSE = Terminal("CLOSE", "close")
COMMIT = Terminal("COMMIT", "commit")
CONTINUE = Terminal("CONTINUE", "continue")
CREATE = Terminal("CREATE", "create")
CURRENT = Terminal("CURRENT", "current")
CURSOR = Terminal("CURSOR", "cursor")
DECIMAL = Terminal("DECIMAL", "decimal")
DECLARE = Terminal("DECLARE", "declare")
DEFAULT = Terminal("DEFAULT", "default")
DELETE = Terminal("DELETE", "delete")
DESC = Terminal("DESC", "desc")
DISTINCT = Terminal("DISTINCT", "distinct")
DOUBLE = Terminal("DOUBLE", "double")
ESCAPE = Terminal("ESCAPE", "escape")
EXISTS = Terminal("EXISTS", "exists")
FETCH = Terminal("FETCH", "fetch")
FLOAT = Terminal("FLOAT", "float")
FOR = Terminal("FOR", "for")
FOREIGN = Terminal("FOREIGN", "foreign")
FOUND = Terminal("FOUND", "found")
FROM = Terminal("FROM", "from")
GOTO = Terminal("GOTO", "goto")
GRANT = Terminal("GRANT", "grant")
GROUP = Terminal("GROUP", "group")
HAVING = Terminal("HAVING", "having")
IN = Terminal("IN", "in")
INDICATOR = Terminal("INDICATOR", "indicator")
INSERT = Terminal("INSERT", "insert")
INTEGER = Terminal("INTEGER", "integer")
INTO = Terminal("INTO", "into")
IS = Terminal("IS", "is")
KEY = Terminal("KEY", "key")
LANGUAGE = Terminal("LANGUAGE", "language")
LIKE = Terminal("LIKE", "like")
NULL = Terminal("NULL", "null")
NUMERIC = Terminal("NUMERIC", "numeric")
OF = Terminal("OF", "of")
ON = Terminal("ON", "on")
OPEN = Terminal("OPEN", "open")
OPTION = Terminal("OPTION", "option")
ORDER = Terminal("ORDER", "order")
PARAMETER = Terminal("PARAMETER", "parameter")
PRECISION = Terminal("PRECISION", "precision")
PRIMARY = Terminal("PRIMARY", "primary")
PRIVILEGES = Terminal("PRIVILEGES", "privileges")
PROCEDURE = Terminal("PROCEDURE", "procedure")
PUBLIC = Terminal("PUBLIC", "public")
REAL = Terminal("REAL", "real")
REFERENCES = Terminal("REFERENCES", "references")
ROLLBACK = Terminal("ROLLBACK", "rollback")
SCHEMA = Terminal("SCHEMA", "schema")
SELECT = Terminal("SELECT", "select")
SET = Terminal("SET", "set")
SMALLINT = Terminal("SMALLINT", "smallint")
SOME = Terminal("SOME", "some")
SQLCODE = Terminal("SQLCODE", "sqlcode")
SQLERROR = Terminal("SQLERROR", "sqlerror")
TABLE = Terminal("TABLE", "table")
TO = Terminal("TO", "to")
UNION = Terminal("UNION", "union")
UNIQUE = Terminal("UNIQUE", "unique")
UPDATE = Terminal("UPDATE", "update")
USER = Terminal("USER", "user")
VALUES = Terminal("VALUES", "values")
VIEW = Terminal("VIEW", "view")
WHENEVER = Terminal("WHENEVER", "whenever")
WHERE = Terminal("WHERE", "where")
WITH = Terminal("WITH", "with")
WORK = Terminal("WORK", "work")
ALL = Terminal("all") SEMICOLON = Terminal("SEMICOLON", ";")
AMMSC = Terminal("ammsc") LPAREN = Terminal("LPAREN", "(")
ANY = Terminal("any") RPAREN = Terminal("RPAREN", ")")
ASC = Terminal("asc") COMMA = Terminal("COMMA", ",")
AUTHORIZATION = Terminal("authorization") EQUAL = Terminal("EQUAL", "=")
BETWEEN = Terminal("between") DOT = Terminal("DOT", ".")
BY = Terminal("by")
CHARACTER = Terminal("character")
CHECK = Terminal("check")
CLOSE = Terminal("close")
COMMIT = Terminal("commit")
CONTINUE = Terminal("continue")
CREATE = Terminal("create")
CURRENT = Terminal("current")
CURSOR = Terminal("cursor")
DECIMAL = Terminal("decimal")
DECLARE = Terminal("declare")
DEFAULT = Terminal("default")
DELETE = Terminal("delete")
DESC = Terminal("desc")
DISTINCT = Terminal("distinct")
DOUBLE = Terminal("double")
ESCAPE = Terminal("escape")
EXISTS = Terminal("exists")
FETCH = Terminal("fetch")
FLOAT = Terminal("float")
FOR = Terminal("for")
FOREIGN = Terminal("foreign")
FOUND = Terminal("found")
FROM = Terminal("from")
GOTO = Terminal("goto")
GRANT = Terminal("grant")
GROUP = Terminal("group")
HAVING = Terminal("having")
IN = Terminal("in")
INDICATOR = Terminal("indicator")
INSERT = Terminal("insert")
INTEGER = Terminal("integer")
INTO = Terminal("into")
IS = Terminal("is")
KEY = Terminal("key")
LANGUAGE = Terminal("language")
LIKE = Terminal("like")
NULL = Terminal("null")
NUMERIC = Terminal("numeric")
OF = Terminal("of")
ON = Terminal("on")
OPEN = Terminal("open")
OPTION = Terminal("option")
ORDER = Terminal("order")
PARAMETER = Terminal("parameter")
PRECISION = Terminal("precision")
PRIMARY = Terminal("primary")
PRIVILEGES = Terminal("privileges")
PROCEDURE = Terminal("procedure")
PUBLIC = Terminal("public")
REAL = Terminal("real")
REFERENCES = Terminal("references")
ROLLBACK = Terminal("rollback")
SCHEMA = Terminal("schema")
SELECT = Terminal("select")
SET = Terminal("set")
SMALLINT = Terminal("smallint")
SOME = Terminal("some")
SQLCODE = Terminal("sqlcode")
SQLERROR = Terminal("sqlerror")
TABLE = Terminal("table")
TO = Terminal("to")
UNION = Terminal("union")
UNIQUE = Terminal("unique")
UPDATE = Terminal("update")
USER = Terminal("user")
VALUES = Terminal("values")
VIEW = Terminal("view")
WHENEVER = Terminal("whenever")
WHERE = Terminal("where")
WITH = Terminal("with")
WORK = Terminal("work")
SEMICOLON = Terminal(";") BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
LPAREN = Terminal("(") LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
RPAREN = Terminal(")") COMMENT = Terminal(
COMMA = Terminal(",") "COMMENT",
EQUAL = Terminal("=") Re.seq(Re.literal("--"), Re.set("\n").invert().star()),
DOT = Terminal(".") highlight=highlight.comment.line,
AS = Terminal("as") trivia_mode=TriviaMode.LineComment,
)
@rule @rule
@ -740,3 +743,19 @@ def user():
@rule @rule
def when_action(): def when_action():
return (GOTO + NAME) | CONTINUE return (GOTO + NAME) | CONTINUE
SQL = Grammar(
start=sql_list,
precedence=[
(Assoc.LEFT, [OR]),
(Assoc.LEFT, [AND]),
(Assoc.LEFT, [NOT]),
(Assoc.LEFT, [COMPARISON]),
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
# TODO: Unary minus
],
trivia=[BLANKS, COMMENT, LINE_BREAK],
name="SQL",
)

View file

@ -11,138 +11,141 @@ import parser.runtime as runtime
# Tests based on # Tests based on
# https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html # https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
class LGrammar(Grammar):
start = "File"
trivia = ["BLANKS"]
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
TRUE = Terminal("TRUE", "true")
FALSE = Terminal("FALSE", "false")
INT = Terminal("INT", Re.set(("0", "9")).plus())
FN = Terminal("FN", "fn")
ARROW = Terminal("ARROW", "->")
COMMA = Terminal("COMMA", ",")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
LCURLY = Terminal("LCURLY", "{")
RCURLY = Terminal("RCURLY", "}")
COLON = Terminal("COLON", ":")
SEMICOLON = Terminal("SEMICOLON", ";")
LET = Terminal("LET", "let")
EQUAL = Terminal("EQUAL", "=")
RETURN = Terminal("RETURN", "return")
PLUS = Terminal("PLUS", "+")
MINUS = Terminal("MINUS", "-")
STAR = Terminal("STAR", "*")
SLASH = Terminal("SLASH", "/")
NAME = Terminal(
"NAME",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
@rule
def File():
# TODO: Make lists easier
return _functions
@rule
def _functions():
return Function | (_functions + Function)
@rule
def Function():
return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block
@rule
def ParamList():
return LPAREN + opt(_parameters) + RPAREN
@rule
def _parameters():
# NOTE: The ungrammar in the reference does not talk about commas
# required between parameters so this massages it to make them
# required. Commas are in the list not the param, which is more
# awkward for processing but not terminally so.
return (Param + opt(COMMA)) | (Param + COMMA + _parameters)
@rule
def Param():
return NAME + COLON + TypeExpr
@rule
def TypeExpr():
return NAME
@rule
def Block():
return LCURLY + opt(_statements) + RCURLY
@rule
def _statements():
return Stmt | _statements + Stmt
@rule
def Stmt():
return StmtExpr | StmtLet | StmtReturn
@rule
def StmtExpr():
return Expr + SEMICOLON
@rule
def StmtLet():
return LET + NAME + EQUAL + Expr + SEMICOLON
@rule
def StmtReturn():
return RETURN + Expr + SEMICOLON
@rule
def Expr():
return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall
@rule
def ExprLiteral():
return INT | TRUE | FALSE
@rule
def ExprName():
return NAME
@rule
def ExprParen():
return LPAREN + Expr + RPAREN
@rule
def ExprBinary():
return Expr + (PLUS | MINUS | STAR | SLASH) + Expr
@rule
def ExprCall():
return Expr + ArgList
@rule
def ArgList():
return LPAREN + opt(_arg_star) + RPAREN
@rule
def _arg_star():
# Again, a deviation from the original. See _parameters.
return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star)
LGrammar = Grammar(
start=File,
trivia=[BLANKS],
# Need a little bit of disambiguation for the symbol involved. # Need a little bit of disambiguation for the symbol involved.
precedence = [ precedence = [
(Assoc.LEFT, ["PLUS", "MINUS"]), (Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, ["STAR", "SLASH"]), (Assoc.LEFT, [STAR, SLASH]),
(Assoc.LEFT, ["LPAREN"]), (Assoc.LEFT, [LPAREN]),
] ],
)
@rule L_PARSE_TABLE = LGrammar.build_table()
def File(self): L_LEXER_TABLE = LGrammar.compile_lexer()
# TODO: Make lists easier
return self._functions
@rule
def _functions(self):
return self.Function | (self._functions + self.Function)
@rule
def Function(self):
return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block
@rule
def ParamList(self):
return self.LPAREN + opt(self._parameters) + self.RPAREN
@rule
def _parameters(self):
# NOTE: The ungrammar in the reference does not talk about commas required between parameters
# so this massages it to make them required. Commas are in the list not the param, which
# is more awkward for processing but not terminally so.
return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters)
@rule
def Param(self):
return self.NAME + self.COLON + self.TypeExpr
@rule
def TypeExpr(self):
return self.NAME
@rule
def Block(self):
return self.LCURLY + opt(self._statements) + self.RCURLY
@rule
def _statements(self):
return self.Stmt | self._statements + self.Stmt
@rule
def Stmt(self):
return self.StmtExpr | self.StmtLet | self.StmtReturn
@rule
def StmtExpr(self):
return self.Expr + self.SEMICOLON
@rule
def StmtLet(self):
return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON
@rule
def StmtReturn(self):
return self.RETURN + self.Expr + self.SEMICOLON
@rule
def Expr(self):
return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall
@rule
def ExprLiteral(self):
return self.INT | self.TRUE | self.FALSE
@rule
def ExprName(self):
return self.NAME
@rule
def ExprParen(self):
return self.LPAREN + self.Expr + self.RPAREN
@rule
def ExprBinary(self):
return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr
@rule
def ExprCall(self):
return self.Expr + self.ArgList
@rule
def ArgList(self):
return self.LPAREN + opt(self._arg_star) + self.RPAREN
@rule
def _arg_star(self):
# Again, a deviation from the original. See _parameters.
return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
TRUE = Terminal("true")
FALSE = Terminal("false")
INT = Terminal(Re.set(("0", "9")).plus())
FN = Terminal("fn")
ARROW = Terminal("->")
COMMA = Terminal(",")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COLON = Terminal(":")
SEMICOLON = Terminal(";")
LET = Terminal("let")
EQUAL = Terminal("=")
RETURN = Terminal("return")
PLUS = Terminal("+")
MINUS = Terminal("-")
STAR = Terminal("*")
SLASH = Terminal("/")
NAME = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
L_PARSE_TABLE = LGrammar().build_table()
L_LEXER_TABLE = LGrammar().compile_lexer()
def test_matklad_one(): def test_matklad_one():

View file

@ -1,6 +1,5 @@
import pytest import pytest
import parser
import parser.runtime as runtime import parser.runtime as runtime
from parser import Grammar, seq, rule, Terminal from parser import Grammar, seq, rule, Terminal
@ -40,117 +39,68 @@ def _tree(treeform, count=0) -> runtime.Tree | runtime.TokenValue:
def test_lr0_lr0(): def test_lr0_lr0():
"""An LR0 grammar should work with an LR0 generator.""" """An LR0 grammar should work with an LR0 generator."""
class G(Grammar): PLUS = Terminal("+", "+")
start = "E" LPAREN = Terminal("(", "(")
# generator = parser.GenerateLR0 RPAREN = Terminal(")", ")")
IDENTIFIER = Terminal("id", "id")
@rule @rule
def E(self): def E():
return seq(self.E, self.PLUS, self.T) | self.T return seq(E, PLUS, T) | T
@rule @rule
def T(self): def T():
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER return seq(LPAREN, E, RPAREN) | IDENTIFIER
PLUS = Terminal("+", name="+") G = Grammar(start=E)
LPAREN = Terminal("(", name="(")
RPAREN = Terminal(")", name=")")
IDENTIFIER = Terminal("id", name="id")
table = G().build_table() table = G.build_table()
tree, errors = runtime.Parser(table).parse( tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN))
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
)
assert errors == [] assert errors == []
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")"))) assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
def test_all_generators():
"""This grammar should work with everything honestly."""
class G(Grammar):
start = "E"
@rule
def E(self):
return seq(self.E, self.PLUS, self.T) | self.T
@rule
def T(self):
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
PLUS = Terminal("+", name="+")
LPAREN = Terminal("(", name="(")
RPAREN = Terminal(")", name=")")
IDENTIFIER = Terminal("id", name="id")
GENERATORS = [
# parser.GenerateLR0,
# parser.GeneratePager,
parser.ParserGenerator,
]
for generator in GENERATORS:
table = G().build_table(generator=generator)
tree, errors = runtime.Parser(table).parse(
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
)
print("\n")
print(generator)
print(f"{table.format()}")
assert errors == []
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
def test_grammar_aho_ullman_2(): def test_grammar_aho_ullman_2():
class TestGrammar(Grammar): @rule
start = "S" def S():
return seq(X, X)
@rule @rule
def S(self): def X():
return seq(self.X, self.X) return seq(A, X) | B
@rule A = Terminal("A", "a")
def X(self): B = Terminal("B", "b")
return seq(self.A, self.X) | self.B
A = Terminal("a") Grammar(start=S).build_table()
B = Terminal("b")
TestGrammar().build_table(generator=parser.ParserGenerator)
# TestGrammar().build_table(generator=parser.GeneratePager)
def test_fun_lalr(): def test_fun_lalr():
@rule
def S():
return seq(V, E)
class TestGrammar(Grammar): @rule
start = "S" def E():
return F | seq(E, PLUS, F)
@rule @rule
def S(self): def F():
return seq(self.V, self.E) return V | INT | seq(LPAREN, E, RPAREN)
@rule @rule
def E(self): def V():
return self.F | seq(self.E, self.PLUS, self.F) return ID
@rule PLUS = Terminal("PLUS", "+")
def F(self): INT = Terminal("INT", "int")
return self.V | self.INT | seq(self.LPAREN, self.E, self.RPAREN) ID = Terminal("ID", "id")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
@rule Grammar(start=S).build_table()
def V(self):
return self.ID
PLUS = Terminal("+")
INT = Terminal("int")
ID = Terminal("id")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
TestGrammar().build_table()
def test_conflicting_names(): def test_conflicting_names():
@ -167,43 +117,28 @@ def test_conflicting_names():
to understand. to understand.
""" """
class TestGrammar(Grammar): @rule("IDENTIFIER")
start = "IDENTIFIER" def identifier():
return IDENTIFIER
@rule("IDENTIFIER") IDENTIFIER = Terminal("IDENTIFIER", "Identifier")
def identifier(self):
return self.IDENTIFIER
IDENTIFIER = Terminal("Identifier")
with pytest.raises(ValueError): with pytest.raises(ValueError):
TestGrammar().build_table() Grammar(start=identifier).build_table()
def test_grammar_ignore_trivia(): def test_grammar_ignore_trivia():
class G(Grammar): @rule
start = "sentence" def sentence():
return WORD | seq(sentence, WORD)
trivia = ["BLANK"] WORD = Terminal("WORD", "blah")
BLANK = Terminal("BLANK", " ")
@rule table = Grammar(start=sentence, trivia=[BLANK]).build_table()
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
table = G().build_table()
assert "BLANK" in table.trivia assert "BLANK" in table.trivia
tree, errors = runtime.Parser(table).parse( tree, errors = runtime.Parser(table).parse(Tokens(WORD, BLANK, WORD, BLANK))
Tokens(
G.WORD,
G.BLANK,
G.WORD,
G.BLANK,
)
)
assert errors == [] assert errors == []
assert tree == runtime.Tree( assert tree == runtime.Tree(
@ -234,135 +169,3 @@ def test_grammar_ignore_trivia():
), ),
), ),
) )
def test_grammar_unknown_trivia():
class G(Grammar):
start = "sentence"
trivia = ["BLANK"]
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
with pytest.raises(ValueError):
G().build_table()
def test_grammar_trivia_symbol():
class G(Grammar):
start = "sentence"
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
trivia = [BLANK]
table = G().build_table()
assert "BLANK" in table.trivia
def test_grammar_trivia_constructor():
class G(Grammar):
start = "sentence"
def __init__(self):
super().__init__(trivia=[self.BLANK])
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
table = G().build_table()
assert "BLANK" in table.trivia
def test_grammar_trivia_constructor_string():
class G(Grammar):
start = "sentence"
def __init__(self):
super().__init__(trivia=["BLANK"])
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
table = G().build_table()
assert "BLANK" in table.trivia
def test_grammar_trivia_constructor_string_unknown():
class G(Grammar):
start = "sentence"
def __init__(self):
super().__init__(trivia=["BLANK"])
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
with pytest.raises(ValueError):
G().build_table()
def test_grammar_name_implicit():
class FooGrammar(Grammar):
start = "x"
@rule
def x(self):
return self.WORD
WORD = Terminal("blah")
assert FooGrammar().name == "foo"
def test_grammar_name_explicit_member():
class FooGrammar(Grammar):
start = "x"
name = "bar"
@rule
def x(self):
return self.WORD
WORD = Terminal("blah")
assert FooGrammar().name == "bar"
def test_grammar_name_explicit_constructor():
class FooGrammar(Grammar):
start = "x"
name = "bar"
def __init__(self):
super().__init__(name="baz")
@rule
def x(self):
return self.WORD
WORD = Terminal("blah")
assert FooGrammar().name == "baz"

View file

@ -354,32 +354,33 @@ def test_edge_list_always_sorted(points: list[tuple[int, int]]):
def test_lexer_compile(): def test_lexer_compile():
class LexTest(Grammar): @rule
@rule def foo():
def foo(self): # NOTE: This is a hack to ensure the terminals are reachable. :P
return self.IS return IS | AS | IDENTIFIER
start = "foo" IS = Terminal("IS", "is")
AS = Terminal("AS", "as")
IS = Terminal("is") IDENTIFIER = Terminal(
AS = Terminal("as") "IDENTIFIER",
IDENTIFIER = Terminal( Re.seq(
Re.seq( Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
)
) )
BLANKS = Terminal(Re.set("\r", "\n", "\t", " ").plus()) )
BLANKS = Terminal("BLANKS", Re.set("\r", "\n", "\t", " ").plus())
lexer = LexTest().compile_lexer()
LexTest = Grammar(start=foo, trivia=[BLANKS])
lexer = LexTest.compile_lexer()
dump_lexer_table(lexer) dump_lexer_table(lexer)
tokens = list(generic_tokenize("xy is ass", lexer)) tokens = list(generic_tokenize("xy is ass", lexer))
assert tokens == [ assert tokens == [
(LexTest.IDENTIFIER, 0, 2), (IDENTIFIER, 0, 2),
(LexTest.BLANKS, 2, 1), (BLANKS, 2, 1),
(LexTest.IS, 3, 2), (IS, 3, 2),
(LexTest.BLANKS, 5, 1), (BLANKS, 5, 1),
(LexTest.IDENTIFIER, 6, 3), (IDENTIFIER, 6, 3),
] ]
@ -387,34 +388,35 @@ def test_lexer_compile():
def test_lexer_numbers(n: float): def test_lexer_numbers(n: float):
assume(math.isfinite(n)) assume(math.isfinite(n))
class LexTest(Grammar): @rule
@rule def number():
def number(self): return NUMBER
return self.NUMBER
start = "number" NUMBER = Terminal(
"NUMBER",
NUMBER = Terminal( Re.seq(
Re.set(("0", "9")).plus(),
Re.seq( Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
Re.seq( ).question(),
Re.literal("."), Re.seq(
Re.set(("0", "9")).plus(), Re.set("e", "E"),
).question(), Re.set("+", "-").question(),
Re.seq( Re.set(("0", "9")).plus(),
Re.set("e", "E"), ).question(),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
)
) )
)
lexer = LexTest().compile_lexer()
LexTest = Grammar(start=number)
lexer = LexTest.compile_lexer()
dump_lexer_table(lexer) dump_lexer_table(lexer)
number_string = str(n) number_string = str(n)
tokens = list(generic_tokenize(number_string, lexer)) tokens = list(generic_tokenize(number_string, lexer))
assert tokens == [ assert tokens == [
(LexTest.NUMBER, 0, len(number_string)), (NUMBER, 0, len(number_string)),
] ]

View file

@ -23,69 +23,66 @@ import parser.wadler.builder as builder
import parser.wadler.runtime as runtime import parser.wadler.runtime as runtime
class JsonGrammar(Grammar): def make_json_grammar():
start = "root"
trivia = ["BLANKS"]
@rule @rule
def root(self): def root():
return self.value return value
@rule(transparent=True) @rule(transparent=True)
def value(self): def value():
return ( return (
self.object object
| self.array | array
| self.NUMBER | NUMBER
| self.TRUE | TRUE
| self.FALSE | FALSE
| self.NULL | NULL
| self.STRING | STRING
) )
@rule @rule
def object(self): def object():
return group( return group(
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY LCURLY + opt(indent(newline() + _object_pairs)) + newline() + RCURLY
) )
@rule @rule
def _object_pairs(self): def _object_pairs():
return alt( return alt(
self.object_pair, object_pair,
self.object_pair + self.COMMA + newline(" ") + self._object_pairs, object_pair + COMMA + newline(" ") + _object_pairs,
) )
@rule @rule
def object_pair(self): def object_pair():
return group(self.STRING + self.COLON + indent(newline(" ") + self.value)) return group(STRING + COLON + indent(newline(" ") + value))
@rule @rule
def array(self): def array():
return group( return group(
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE LSQUARE + opt(indent(newline() + _array_items)) + newline() + RSQUARE
) )
@rule @rule
def _array_items(self): def _array_items():
return alt( return alt(
self.value, value,
self.value + self.COMMA + newline(" ") + self._array_items, value + COMMA + newline(" ") + _array_items,
) )
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
LCURLY = Terminal("{") LCURLY = Terminal("LCURLY", "{")
RCURLY = Terminal("}") RCURLY = Terminal("RCURLY", "}")
COMMA = Terminal(",") COMMA = Terminal("COMMA", ",")
COLON = Terminal(":") COLON = Terminal("COLON", ":")
LSQUARE = Terminal("[") LSQUARE = Terminal("LSQUARE", "[")
RSQUARE = Terminal("]") RSQUARE = Terminal("RSQUARE", "]")
TRUE = Terminal("true") TRUE = Terminal("TRUE", "true")
FALSE = Terminal("false") FALSE = Terminal("FALSE", "false")
NULL = Terminal("null") NULL = Terminal("NULL", "null")
NUMBER = Terminal( NUMBER = Terminal(
"NUMBER",
Re.seq( Re.seq(
Re.set(("0", "9")).plus(), Re.set(("0", "9")).plus(),
Re.seq( Re.seq(
@ -100,6 +97,7 @@ class JsonGrammar(Grammar):
), ),
) )
STRING = Terminal( STRING = Terminal(
"STRING",
Re.seq( Re.seq(
Re.literal('"'), Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(), (~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
@ -107,8 +105,9 @@ class JsonGrammar(Grammar):
) )
) )
return Grammar(start=root, trivia=[BLANKS])
JSON = JsonGrammar() JSON = make_json_grammar()
JSON_PARSER = JSON.build_table() JSON_PARSER = JSON.build_table()
JSON_LEXER = JSON.compile_lexer() JSON_LEXER = JSON.compile_lexer()
@ -228,47 +227,49 @@ def test_layout_basic():
) )
class TG(Grammar): def make_test_grammar():
start = "root" @rule
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"] def root():
return _expression
@rule @rule
def root(self): def _expression():
return self._expression return word | list
@rule @rule
def _expression(self): def list():
return self.word | self.list return group(LPAREN, indent(nl, _expressions), nl, RPAREN)
@rule @rule
def list(self): def _expressions():
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN) return _expression | seq(_expressions, sp, _expression)
@rule @rule
def _expressions(self): def word():
return self._expression | seq(self._expressions, sp, self._expression) return OK | seq(BREAK, br, BREAK)
@rule LPAREN = Terminal("LPAREN", "(")
def word(self): RPAREN = Terminal("RPAREN", ")")
return self.OK | seq(self.BREAK, br, self.BREAK) OK = Terminal("OK", "ok")
BREAK = Terminal("BREAK", "break")
LPAREN = Terminal("(") BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
RPAREN = Terminal(")") LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
OK = Terminal("ok")
BREAK = Terminal("break")
BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal( COMMENT = Terminal(
"COMMENT",
Re.seq(Re.literal(";"), Re.set("\n").invert().star()), Re.seq(Re.literal(";"), Re.set("\n").invert().star()),
trivia_mode=TriviaMode.LineComment, trivia_mode=TriviaMode.LineComment,
) )
return Grammar(start=root, trivia=[BLANKS, LINE_BREAK, COMMENT], pretty_indent=" ")
TG = make_test_grammar()
def test_forced_break(): def test_forced_break():
g = TG() g_lexer = TG.compile_lexer()
g_lexer = g.compile_lexer() g_parser = TG.build_table()
g_parser = g.build_table()
text = "((ok ok) (ok break break ok) (ok ok ok ok))" text = "((ok ok) (ok break break ok) (ok ok ok ok))"
@ -276,29 +277,28 @@ def test_forced_break():
assert errors == [] assert errors == []
assert tree is not None assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g)) printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text) result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output( assert result == _output(
""" """
( (
(ok ok) (ok ok)
( (
ok ok
break break
break break
ok ok
) )
(ok ok ok ok) (ok ok ok ok)
) )
""" """
) )
def test_maintaining_line_breaks(): def test_maintaining_line_breaks():
g = TG() g_lexer = TG.compile_lexer()
g_lexer = g.compile_lexer() g_parser = TG.build_table()
g_parser = g.build_table()
text = """((ok ok) text = """((ok ok)
; Don't break here. ; Don't break here.
@ -316,30 +316,29 @@ def test_maintaining_line_breaks():
assert errors == [] assert errors == []
assert tree is not None assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g)) printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text) result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output( assert result == _output(
""" """
( (
(ok ok) (ok ok)
; Don't break here. ; Don't break here.
(ok) (ok)
*SPACE* *SPACE**SPACE*
; ^ Do keep this break though. ; ^ Do keep this break though.
(ok) (ok)
*SPACE* *SPACE**SPACE*
; ^ This should only be one break. ; ^ This should only be one break.
(ok) (ok)
) )
""" """
) )
def test_trailing_trivia(): def test_trailing_trivia():
g = TG() g_lexer = TG.compile_lexer()
g_lexer = g.compile_lexer() g_parser = TG.build_table()
g_parser = g.build_table()
text = """((ok ok)); Don't lose this! text = """((ok ok)); Don't lose this!
@ -350,7 +349,7 @@ def test_trailing_trivia():
assert errors == [] assert errors == []
assert tree is not None assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g)) printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text) result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output( assert result == _output(
@ -363,9 +362,8 @@ def test_trailing_trivia():
def test_trailing_trivia_two(): def test_trailing_trivia_two():
g = TG() g_lexer = TG.compile_lexer()
g_lexer = g.compile_lexer() g_parser = TG.build_table()
g_parser = g.build_table()
text = """((ok ok)) text = """((ok ok))
@ -376,7 +374,7 @@ def test_trailing_trivia_two():
assert errors == [] assert errors == []
assert tree is not None assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g)) printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text) result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output( assert result == _output(
@ -389,9 +387,8 @@ def test_trailing_trivia_two():
def test_trailing_trivia_split(): def test_trailing_trivia_split():
g = TG() g_lexer = TG.compile_lexer()
g_lexer = g.compile_lexer() g_parser = TG.build_table()
g_parser = g.build_table()
text = """((ok ok)); Don't lose this! text = """((ok ok)); Don't lose this!
@ -432,7 +429,7 @@ def test_trailing_trivia_split():
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}") print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
trivia_doc = runtime.Matcher( trivia_doc = runtime.Matcher(
builder.MatcherTable(ParseTable([], [], set()), {}, {}), builder.MatcherTable(ParseTable([], [], set(), {}), {}, {}),
TRIVIA_MODES, TRIVIA_MODES,
).apply_post_trivia( ).apply_post_trivia(
token.post_trivia, token.post_trivia,