[all] A whole new style for grammars

Say good by to the sea of `self.`!
This commit is contained in:
John Doty 2024-11-09 11:21:30 -08:00
parent d6f1e7aba1
commit 5064a768e7
10 changed files with 1097 additions and 1318 deletions

View file

@ -20,444 +20,415 @@ from parser import (
sp,
)
class FineGrammar(Grammar):
# generator = parser.GenerateLR1
# generator = parser.GeneratePager
start = "File"
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
pretty_indent = " "
def __init__(self):
super().__init__(
precedence=[
(Assoc.RIGHT, [self.EQUAL]),
(Assoc.LEFT, [self.OR]),
(Assoc.LEFT, [self.IS]),
(Assoc.LEFT, [self.AND]),
(Assoc.LEFT, [self.EQUALEQUAL, self.BANGEQUAL]),
(Assoc.LEFT, [self.LESS, self.GREATER, self.GREATEREQUAL, self.LESSEQUAL]),
(Assoc.LEFT, [self.PLUS, self.MINUS]),
(Assoc.LEFT, [self.STAR, self.SLASH]),
(Assoc.LEFT, [self.primary_expression]),
(Assoc.LEFT, [self.LPAREN]),
(Assoc.LEFT, [self.DOT]),
#
# If there's a confusion about whether to make an IF
# statement or an expression, prefer the statement.
#
(Assoc.NONE, [self.if_statement]),
],
)
@rule("File")
def file(self) -> Rule:
return self._file_statement_list
def file() -> Rule:
return _file_statement_list
@rule
def _file_statement_list(self) -> Rule:
def _file_statement_list() -> Rule:
return alt(
self._file_statement,
self._file_statement_list + nl + self._file_statement,
_file_statement,
_file_statement_list + nl + _file_statement,
)
@rule
def _file_statement(self) -> Rule:
def _file_statement() -> Rule:
return (
self.import_statement | self.class_declaration | self.export_statement | self._statement
import_statement | class_declaration | export_statement | _statement
)
@rule
def import_statement(self) -> Rule:
def import_statement() -> Rule:
return group(
self.IMPORT, sp, self.STRING, sp, self.AS, sp, self.IDENTIFIER, sp, self.SEMICOLON
IMPORT, sp, STRING, sp, AS, sp, IDENTIFIER, sp, SEMICOLON
)
@rule("ClassDeclaration")
def class_declaration(self) -> Rule:
def class_declaration() -> Rule:
return seq(
group(
self.CLASS,
CLASS,
sp,
mark(self.IDENTIFIER, field="name", highlight=highlight.entity.name.type),
mark(IDENTIFIER, field="name", highlight=highlight.entity.name.type),
sp,
self.LCURLY,
LCURLY,
),
indent(nl, mark(opt(self.class_body), field="body")),
indent(nl, mark(opt(class_body), field="body")),
nl,
self.RCURLY,
RCURLY,
nl, # Extra newline at the end of the class
)
@rule("ClassBody")
def class_body(self) -> Rule:
return self._class_members
def class_body() -> Rule:
return _class_members
@rule
def _class_members(self) -> Rule:
return self._class_member | seq(self._class_members, nl, self._class_member)
def _class_members() -> Rule:
return _class_member | seq(_class_members, nl, _class_member)
@rule
def _class_member(self) -> Rule:
return self.field_declaration | self.function_declaration
def _class_member() -> Rule:
return field_declaration | function_declaration
@rule("FieldDecl")
def field_declaration(self) -> Rule:
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression, self.SEMICOLON)
def field_declaration() -> Rule:
return group(IDENTIFIER, COLON, sp, type_expression, SEMICOLON)
# Types
@rule("TypeExpression")
def type_expression(self) -> Rule:
return self.alternate_type | self.type_identifier
def type_expression() -> Rule:
return alternate_type | type_identifier
@rule("AlternateType")
def alternate_type(self) -> Rule:
return group(self.type_expression, sp, self.OR, sp, self.type_identifier)
def alternate_type() -> Rule:
return group(type_expression, sp, OR, sp, type_identifier)
@rule("TypeIdentifier")
def type_identifier(self) -> Rule:
return mark(self.IDENTIFIER, field="id", highlight=highlight.entity.name.type)
def type_identifier() -> Rule:
return mark(IDENTIFIER, field="id", highlight=highlight.entity.name.type)
@rule
def export_statement(self) -> Rule:
def export_statement() -> Rule:
return alt(
group(self.EXPORT, sp, self.class_declaration),
group(self.EXPORT, sp, self.function_declaration),
group(self.EXPORT, sp, self.let_statement),
group(self.EXPORT, sp, self.export_list, self.SEMICOLON),
group(EXPORT, sp, class_declaration),
group(EXPORT, sp, function_declaration),
group(EXPORT, sp, let_statement),
group(EXPORT, sp, export_list, SEMICOLON),
)
@rule
def export_list(self) -> Rule:
return self.IDENTIFIER | seq(self.IDENTIFIER, self.COMMA, sp, self.export_list)
def export_list() -> Rule:
return IDENTIFIER | seq(IDENTIFIER, COMMA, sp, export_list)
# Functions
@rule("FunctionDecl")
def function_declaration(self) -> Rule:
def function_declaration() -> Rule:
return seq(
group(
group(
group(
self.FUN,
FUN,
sp,
mark(
self.IDENTIFIER,
IDENTIFIER,
field="name",
highlight=highlight.entity.name.function,
),
),
nl,
mark(self.function_parameters, field="parameters"),
mark(function_parameters, field="parameters"),
),
mark(
opt(indent(sp, group(self.ARROW, sp, self.type_expression))),
opt(indent(sp, group(ARROW, sp, type_expression))),
field="return_type",
),
),
sp,
mark(self.block, field="body"),
mark(block, field="body"),
nl,
)
@rule("ParamList")
def function_parameters(self) -> Rule:
def function_parameters() -> Rule:
return group(
self.LPAREN,
LPAREN,
indent(
nl,
opt(
self._first_parameter
| seq(self._first_parameter, self.COMMA)
| group(self._first_parameter, self.COMMA, sp, self._parameter_list)
_first_parameter
| seq(_first_parameter, COMMA)
| group(_first_parameter, COMMA, sp, _parameter_list)
),
),
nl,
self.RPAREN,
RPAREN,
)
@rule
def _first_parameter(self) -> Rule:
return self.SELF | self.parameter
def _first_parameter() -> Rule:
return SELF | parameter
@rule
def _parameter_list(self) -> Rule:
return self.parameter | seq(self.parameter, self.COMMA, sp, self._parameter_list)
def _parameter_list() -> Rule:
return parameter | seq(parameter, COMMA, sp, _parameter_list)
@rule("Parameter")
def parameter(self) -> Rule:
return group(self.IDENTIFIER, self.COLON, sp, self.type_expression)
def parameter() -> Rule:
return group(IDENTIFIER, COLON, sp, type_expression)
# Block
@rule("Block")
def block(self) -> Rule:
def block() -> Rule:
return alt(
group(self.LCURLY, nl, self.RCURLY),
group(self.LCURLY, indent(br, self.block_body), sp, self.RCURLY),
group(LCURLY, nl, RCURLY),
group(LCURLY, indent(br, block_body), sp, RCURLY),
)
@rule("BlockBody")
def block_body(self) -> Rule:
def block_body() -> Rule:
return alt(
self.expression,
self._statement_list,
seq(self._statement_list, br, self.expression),
expression,
_statement_list,
seq(_statement_list, br, expression),
)
@rule
def _statement_list(self) -> Rule:
return self._statement | seq(self._statement_list, br, self._statement)
def _statement_list() -> Rule:
return _statement | seq(_statement_list, br, _statement)
@rule
def _statement(self) -> Rule:
def _statement() -> Rule:
return (
self.function_declaration
| self.let_statement
| self.return_statement
| self.for_statement
| self.if_statement
| self.while_statement
| self.expression_statement
function_declaration
| let_statement
| return_statement
| for_statement
| if_statement
| while_statement
| expression_statement
)
@rule("LetStatement")
def let_statement(self) -> Rule:
def let_statement() -> Rule:
return group(
group(
self.LET,
LET,
sp,
self.IDENTIFIER,
IDENTIFIER,
sp,
self.EQUAL,
EQUAL,
),
indent(sp, self.expression, self.SEMICOLON),
indent(sp, expression, SEMICOLON),
)
@rule("ReturnStatement")
def return_statement(self) -> Rule:
def return_statement() -> Rule:
return alt(
group(self.RETURN, indent(sp, group(self.expression, self.SEMICOLON))),
group(self.RETURN, self.SEMICOLON),
group(RETURN, indent(sp, group(expression, SEMICOLON))),
group(RETURN, SEMICOLON),
)
@rule("ForStatement")
def for_statement(self) -> Rule:
def for_statement() -> Rule:
return group(
group(self.FOR, sp, self.iterator_variable, sp, self.IN, sp, group(self.expression)),
self.block,
group(FOR, sp, iterator_variable, sp, IN, sp, group(expression)),
block,
)
@rule("IteratorVariable")
def iterator_variable(self) -> Rule:
return self.IDENTIFIER
def iterator_variable() -> Rule:
return IDENTIFIER
@rule("IfStatement")
def if_statement(self) -> Rule:
return self.conditional_expression
def if_statement() -> Rule:
return conditional_expression
@rule
def while_statement(self) -> Rule:
return group(group(self.WHILE, sp, self.expression), sp, self.block)
def while_statement() -> Rule:
return group(group(WHILE, sp, expression), sp, block)
@rule
def expression_statement(self) -> Rule:
return seq(self.expression, self.SEMICOLON)
def expression_statement() -> Rule:
return seq(expression, SEMICOLON)
# Expressions
@rule(transparent=True)
def expression(self) -> Rule:
return self.binary_expression | self.is_expression | self.primary_expression
def expression() -> Rule:
return binary_expression | is_expression | primary_expression
@rule("BinaryExpression")
def binary_expression(self) -> Rule:
def binary_expression() -> Rule:
return alt(
# Assignment gets special indentation.
group(group(self.expression, sp, self.EQUAL), indent(sp, self.expression)),
group(group(expression, sp, EQUAL), indent(sp, expression)),
# Other ones do not.
group(group(self.expression, sp, self.OR), sp, self.expression),
group(group(self.expression, sp, self.AND), sp, self.expression),
group(group(self.expression, sp, self.EQUALEQUAL), sp, self.expression),
group(group(self.expression, sp, self.BANGEQUAL), sp, self.expression),
group(group(self.expression, sp, self.LESS), sp, self.expression),
group(group(self.expression, sp, self.LESSEQUAL), sp, self.expression),
group(group(self.expression, sp, self.GREATER), sp, self.expression),
group(group(self.expression, sp, self.GREATEREQUAL), sp, self.expression),
group(group(self.expression, sp, self.PLUS), sp, self.expression),
group(group(self.expression, sp, self.MINUS), sp, self.expression),
group(group(self.expression, sp, self.STAR), sp, self.expression),
group(group(self.expression, sp, self.SLASH), sp, self.expression),
group(group(expression, sp, OR), sp, expression),
group(group(expression, sp, AND), sp, expression),
group(group(expression, sp, EQUALEQUAL), sp, expression),
group(group(expression, sp, BANGEQUAL), sp, expression),
group(group(expression, sp, LESS), sp, expression),
group(group(expression, sp, LESSEQUAL), sp, expression),
group(group(expression, sp, GREATER), sp, expression),
group(group(expression, sp, GREATEREQUAL), sp, expression),
group(group(expression, sp, PLUS), sp, expression),
group(group(expression, sp, MINUS), sp, expression),
group(group(expression, sp, STAR), sp, expression),
group(group(expression, sp, SLASH), sp, expression),
)
@rule("IsExpression")
def is_expression(self) -> Rule:
return group(self.expression, sp, self.IS, indent(sp, self.pattern))
def is_expression() -> Rule:
return group(expression, sp, IS, indent(sp, pattern))
@rule
def primary_expression(self) -> Rule:
def primary_expression() -> Rule:
return (
self.identifier_expression
| self.literal_expression
| self.SELF
| seq(self.BANG, self.primary_expression)
| seq(self.MINUS, self.primary_expression)
| self.block
| self.conditional_expression
| self.list_constructor_expression
| self.object_constructor_expression
| self.match_expression
| seq(self.primary_expression, self.LPAREN, self.RPAREN)
identifier_expression
| literal_expression
| SELF
| seq(BANG, primary_expression)
| seq(MINUS, primary_expression)
| block
| conditional_expression
| list_constructor_expression
| object_constructor_expression
| match_expression
| seq(primary_expression, LPAREN, RPAREN)
| group(
self.primary_expression,
self.LPAREN,
indent(nl, self._expression_list),
primary_expression,
LPAREN,
indent(nl, _expression_list),
nl,
self.RPAREN,
RPAREN,
)
| group(self.primary_expression, indent(nl, self.DOT, self.IDENTIFIER))
| group(self.LPAREN, indent(nl, self.expression), nl, self.RPAREN)
| group(primary_expression, indent(nl, DOT, IDENTIFIER))
| group(LPAREN, indent(nl, expression), nl, RPAREN)
)
@rule("IdentifierExpression")
def identifier_expression(self):
return self.IDENTIFIER
def identifier_expression():
return IDENTIFIER
@rule("Literal")
def literal_expression(self):
return self.NUMBER | self.STRING | self.TRUE | self.FALSE
def literal_expression():
return NUMBER | STRING | TRUE | FALSE
@rule("ConditionalExpression")
def conditional_expression(self) -> Rule:
def conditional_expression() -> Rule:
return (
seq(group(self.IF, sp, self.expression), sp, self.block)
seq(group(IF, sp, expression), sp, block)
| seq(
group(self.IF, sp, self.expression),
group(IF, sp, expression),
sp,
self.block,
block,
sp,
self.ELSE,
ELSE,
sp,
self.conditional_expression,
conditional_expression,
)
| seq(
group(self.IF, sp, self.expression), sp, self.block, sp, self.ELSE, sp, self.block
group(IF, sp, expression), sp, block, sp, ELSE, sp, block
)
)
@rule
def list_constructor_expression(self) -> Rule:
def list_constructor_expression() -> Rule:
return alt(
group(self.LSQUARE, nl, self.RSQUARE),
group(self.LSQUARE, indent(nl, self._expression_list), nl, self.RSQUARE),
group(LSQUARE, nl, RSQUARE),
group(LSQUARE, indent(nl, _expression_list), nl, RSQUARE),
)
@rule
def _expression_list(self) -> Rule:
def _expression_list() -> Rule:
return (
self.expression
| seq(self.expression, self.COMMA)
| seq(self.expression, self.COMMA, sp, self._expression_list)
expression
| seq(expression, COMMA)
| seq(expression, COMMA, sp, _expression_list)
)
@rule
def match_expression(self) -> Rule:
def match_expression() -> Rule:
return group(
group(self.MATCH, sp, self.expression, sp, self.LCURLY),
indent(sp, self.match_arms),
group(MATCH, sp, expression, sp, LCURLY),
indent(sp, match_arms),
sp,
self.RCURLY,
RCURLY,
)
@rule("MatchArms")
def match_arms(self) -> Rule:
return self._match_arms
def match_arms() -> Rule:
return _match_arms
@rule
def _match_arms(self) -> Rule:
def _match_arms() -> Rule:
return (
self.match_arm
| seq(self.match_arm, self.COMMA)
| seq(self.match_arm, self.COMMA, br, self._match_arms)
match_arm
| seq(match_arm, COMMA)
| seq(match_arm, COMMA, br, _match_arms)
)
@rule("MatchArm")
def match_arm(self) -> Rule:
return group(self.pattern, sp, self.ARROW, sp, self.expression)
def match_arm() -> Rule:
return group(pattern, sp, ARROW, sp, expression)
@rule("Pattern")
def pattern(self) -> Rule:
def pattern() -> Rule:
return (
group(self.variable_binding, self._pattern_core, sp, self.AND, sp, self.expression)
| group(self.variable_binding, self._pattern_core)
| self._pattern_core
group(variable_binding, _pattern_core, sp, AND, sp, expression)
| group(variable_binding, _pattern_core)
| _pattern_core
)
@rule
def _pattern_core(self) -> Rule:
return self.type_expression | self.wildcard_pattern
def _pattern_core() -> Rule:
return type_expression | wildcard_pattern
@rule("WildcardPattern")
def wildcard_pattern(self) -> Rule:
return self.UNDERSCORE
def wildcard_pattern() -> Rule:
return UNDERSCORE
@rule("VariableBinding")
def variable_binding(self) -> Rule:
return seq(self.IDENTIFIER, self.COLON)
def variable_binding() -> Rule:
return seq(IDENTIFIER, COLON)
@rule
def object_constructor_expression(self) -> Rule:
return group(self.NEW, sp, self.type_identifier, sp, self.field_list)
def object_constructor_expression() -> Rule:
return group(NEW, sp, type_identifier, sp, field_list)
@rule
def field_list(self) -> Rule:
def field_list() -> Rule:
return alt(
seq(self.LCURLY, self.RCURLY),
group(self.LCURLY, indent(nl, self.field_values), nl, self.RCURLY),
seq(LCURLY, RCURLY),
group(LCURLY, indent(nl, field_values), nl, RCURLY),
)
@rule
def field_values(self) -> Rule:
def field_values() -> Rule:
return (
self.field_value
| seq(self.field_value, self.COMMA)
| seq(self.field_value, self.COMMA, sp, self.field_values)
field_value
| seq(field_value, COMMA)
| seq(field_value, COMMA, sp, field_values)
)
@rule
def field_value(self) -> Rule:
return self.IDENTIFIER | group(self.IDENTIFIER, self.COLON, indent(sp, self.expression))
def field_value() -> Rule:
return IDENTIFIER | group(IDENTIFIER, COLON, indent(sp, expression))
BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
"COMMENT",
Re.seq(Re.literal("//"), Re.set("\n").invert().star()),
highlight=highlight.comment.line,
trivia_mode=TriviaMode.LineComment,
)
ARROW = Terminal("->", highlight=highlight.keyword.operator)
AS = Terminal("as", highlight=highlight.keyword.operator.expression)
BAR = Terminal("|", highlight=highlight.keyword.operator.expression)
CLASS = Terminal("class", highlight=highlight.storage.type.klass)
COLON = Terminal(":", highlight=highlight.punctuation.separator)
ELSE = Terminal("else", highlight=highlight.keyword.control.conditional)
FOR = Terminal("for", highlight=highlight.keyword.control)
FUN = Terminal("fun", highlight=highlight.storage.type.function)
ARROW = Terminal("ARROW", "->", highlight=highlight.keyword.operator)
AS = Terminal("AS", "as", highlight=highlight.keyword.operator.expression)
BAR = Terminal("BAR", "|", highlight=highlight.keyword.operator.expression)
CLASS = Terminal("CLASS", "class", highlight=highlight.storage.type.klass)
COLON = Terminal("COLON", ":", highlight=highlight.punctuation.separator)
ELSE = Terminal("ELSE", "else", highlight=highlight.keyword.control.conditional)
FOR = Terminal("FOR", "for", highlight=highlight.keyword.control)
FUN = Terminal("FUN", "fun", highlight=highlight.storage.type.function)
IDENTIFIER = Terminal(
"IDENTIFIER",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
IF = Terminal("if", highlight=highlight.keyword.control.conditional)
IMPORT = Terminal("import", highlight=highlight.keyword.other)
IN = Terminal("in", highlight=highlight.keyword.operator)
LCURLY = Terminal("{", highlight=highlight.punctuation.curly_brace.open)
RCURLY = Terminal("}", highlight=highlight.punctuation.curly_brace.close)
LET = Terminal("let", highlight=highlight.keyword.other)
RETURN = Terminal("return", highlight=highlight.keyword.control)
SEMICOLON = Terminal(";", highlight=highlight.punctuation.separator)
IF = Terminal("IF", "if", highlight=highlight.keyword.control.conditional)
IMPORT = Terminal("IMPORT", "import", highlight=highlight.keyword.other)
IN = Terminal("IN", "in", highlight=highlight.keyword.operator)
LCURLY = Terminal("LCURLY", "{", highlight=highlight.punctuation.curly_brace.open)
RCURLY = Terminal("RCURLY", "}", highlight=highlight.punctuation.curly_brace.close)
LET = Terminal("LET", "let", highlight=highlight.keyword.other)
RETURN = Terminal("RETURN", "return", highlight=highlight.keyword.control)
SEMICOLON = Terminal("SEMICOLON", ";", highlight=highlight.punctuation.separator)
STRING = Terminal(
"STRING",
# Double-quoted string.
Re.seq(
Re.literal('"'),
@ -472,26 +443,27 @@ class FineGrammar(Grammar):
),
highlight=highlight.string.quoted,
)
WHILE = Terminal("while", highlight=highlight.keyword.control)
EQUAL = Terminal("=", highlight=highlight.keyword.operator.expression)
LPAREN = Terminal("(", highlight=highlight.punctuation.parenthesis.open)
RPAREN = Terminal(")", highlight=highlight.punctuation.parenthesis.close)
COMMA = Terminal(",", highlight=highlight.punctuation.separator)
SELF = Terminal("self", name="SELFF", highlight=highlight.variable.language)
OR = Terminal("or", highlight=highlight.keyword.operator.expression)
IS = Terminal("is", highlight=highlight.keyword.operator.expression)
AND = Terminal("and", highlight=highlight.keyword.operator.expression)
EQUALEQUAL = Terminal("==", highlight=highlight.keyword.operator.expression)
BANGEQUAL = Terminal("!=", highlight=highlight.keyword.operator.expression)
LESS = Terminal("<", highlight=highlight.keyword.operator.expression)
GREATER = Terminal(">", highlight=highlight.keyword.operator.expression)
LESSEQUAL = Terminal("<=", highlight=highlight.keyword.operator.expression)
GREATEREQUAL = Terminal(">=", highlight=highlight.keyword.operator.expression)
PLUS = Terminal("+", highlight=highlight.keyword.operator.expression)
MINUS = Terminal("-", highlight=highlight.keyword.operator.expression)
STAR = Terminal("*", highlight=highlight.keyword.operator.expression)
SLASH = Terminal("/", highlight=highlight.keyword.operator.expression)
WHILE = Terminal("WHILE", "while", highlight=highlight.keyword.control)
EQUAL = Terminal("EQUAL", "=", highlight=highlight.keyword.operator.expression)
LPAREN = Terminal("LPAREN", "(", highlight=highlight.punctuation.parenthesis.open)
RPAREN = Terminal("RPAREN", ")", highlight=highlight.punctuation.parenthesis.close)
COMMA = Terminal("COMMA", ",", highlight=highlight.punctuation.separator)
SELF = Terminal("SELFF", "self", highlight=highlight.variable.language)
OR = Terminal("OR", "or", highlight=highlight.keyword.operator.expression)
IS = Terminal("IS", "is", highlight=highlight.keyword.operator.expression)
AND = Terminal("AND", "and", highlight=highlight.keyword.operator.expression)
EQUALEQUAL = Terminal("EQUALEQUAL", "==", highlight=highlight.keyword.operator.expression)
BANGEQUAL = Terminal("BANGEQUAL", "!=", highlight=highlight.keyword.operator.expression)
LESS = Terminal("LESS", "<", highlight=highlight.keyword.operator.expression)
GREATER = Terminal("GREATER", ">", highlight=highlight.keyword.operator.expression)
LESSEQUAL = Terminal("LESSEQUAL", "<=", highlight=highlight.keyword.operator.expression)
GREATEREQUAL = Terminal("GREATEREQUAL", ">=", highlight=highlight.keyword.operator.expression)
PLUS = Terminal("PLUS", "+", highlight=highlight.keyword.operator.expression)
MINUS = Terminal("MINUS", "-", highlight=highlight.keyword.operator.expression)
STAR = Terminal("STAR", "*", highlight=highlight.keyword.operator.expression)
SLASH = Terminal("SLASH", "/", highlight=highlight.keyword.operator.expression)
NUMBER = Terminal(
"NUMBER",
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
@ -506,17 +478,40 @@ class FineGrammar(Grammar):
),
highlight=highlight.constant.numeric,
)
TRUE = Terminal("true", highlight=highlight.constant.language)
FALSE = Terminal("false", highlight=highlight.constant.language)
BANG = Terminal("!", highlight=highlight.keyword.operator.expression)
DOT = Terminal(".", highlight=highlight.punctuation.separator)
MATCH = Terminal("match", highlight=highlight.keyword.other)
EXPORT = Terminal("export", highlight=highlight.keyword.other)
UNDERSCORE = Terminal("_", highlight=highlight.variable.language)
NEW = Terminal("new", highlight=highlight.keyword.operator)
LSQUARE = Terminal("[", highlight=highlight.punctuation.square_bracket.open)
RSQUARE = Terminal("]", highlight=highlight.punctuation.square_bracket.close)
TRUE = Terminal("TRUE", "true", highlight=highlight.constant.language)
FALSE = Terminal("FALSE", "false", highlight=highlight.constant.language)
BANG = Terminal("BANG", "!", highlight=highlight.keyword.operator.expression)
DOT = Terminal("DOT", ".", highlight=highlight.punctuation.separator)
MATCH = Terminal("MATCH", "match", highlight=highlight.keyword.other)
EXPORT = Terminal("EXPORT", "export", highlight=highlight.keyword.other)
UNDERSCORE = Terminal("UNDERSCORE", "_", highlight=highlight.variable.language)
NEW = Terminal("NEW", "new", highlight=highlight.keyword.operator)
LSQUARE = Terminal("LSQUARE", "[", highlight=highlight.punctuation.square_bracket.open)
RSQUARE = Terminal("RSQUARE", "]", highlight=highlight.punctuation.square_bracket.close)
FineGrammar=Grammar(
start=file,
trivia=[BLANKS, LINE_BREAK, COMMENT],
pretty_indent=" ",
precedence=[
(Assoc.RIGHT, [EQUAL]),
(Assoc.LEFT, [OR]),
(Assoc.LEFT, [IS]),
(Assoc.LEFT, [AND]),
(Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
(Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
(Assoc.LEFT, [primary_expression]),
(Assoc.LEFT, [LPAREN]),
(Assoc.LEFT, [DOT]),
#
# If there's a confusion about whether to make an IF
# statement or an expression, prefer the statement.
#
(Assoc.NONE, [if_statement]),
],
)
if __name__ == "__main__":
from pathlib import Path
@ -525,7 +520,7 @@ if __name__ == "__main__":
from parser.tree_sitter import emit_tree_sitter_grammar, emit_tree_sitter_queries
# TODO: Actually generate a lexer/parser for some runtime.
grammar = FineGrammar()
grammar = FineGrammar
table = grammar.build_table()
# print(table.format())

View file

@ -25,8 +25,6 @@ class FaceQuery:
def gather_faces(grammar: parser.Grammar):
nts = {nt.name: nt for nt in grammar.non_terminals()}
def scoop(node: str, input: parser.FlattenedWithMetadata, visited: set[str]) -> list[FaceQuery]:
parts = []
for item in input:
@ -52,13 +50,12 @@ def gather_faces(grammar: parser.Grammar):
)
)
elif isinstance(item, str):
nt = nts[item]
if nt.transparent:
if nt.name in visited:
elif isinstance(item, parser.NonTerminal):
if item.transparent:
if item.name in visited:
continue
visited.add(nt.name)
body = nt.fn(grammar)
visited.add(item.name)
body = item.definition
for production in body.flatten(with_metadata=True):
parts.extend(scoop(node, production, visited))
@ -69,7 +66,7 @@ def gather_faces(grammar: parser.Grammar):
if rule.transparent:
continue
body = rule.fn(grammar)
body = rule.definition
for production in body.flatten(with_metadata=True):
queries.extend(scoop(rule.name, production, set()))

View file

@ -17,12 +17,10 @@ the thing that processes the tables.
## Making Grammars
To get started, create a grammar that derives from the `Grammar` class. Create
one method per nonterminal, decorated with the `rule` decorator. Here's an
example:
Define a series of terminals (with `Terminal`) and rules (as functions decorated
with `@rule`), and then pass the starting rule to the constructor of a `Grammar`
object:
class SimpleGrammar(Grammar):
@rule
def expression(self):
return seq(self.expression, self.PLUS, self.term) | self.term
@ -36,6 +34,7 @@ example:
RPAREN = Terminal(')')
ID = Terminal('id')
grammar = Grammar(start=expression)
## Using grammars
@ -1533,7 +1532,9 @@ class ParserGenerator:
return builder.flush(config_sets)
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
FlattenedWithMetadata = list[
"NonTerminal|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"
]
###############################################################################
@ -1578,26 +1579,32 @@ class Rule:
class Terminal(Rule):
"""A token, or terminal symbol in the grammar."""
name: str | None
name: str
pattern: "str | Re"
meta: dict[str, typing.Any]
regex: bool
error_name: str | None
definition_location: str
def __init__(
self,
name: str,
pattern: "str|Re",
*,
name: str | None = None,
error_name: str | None = None,
**kwargs,
):
# TODO: Consider identifying the name from some kind of globals
# dictionary or something if necessary.
self.name = name
self.pattern = pattern
self.meta = kwargs
self.regex = isinstance(pattern, Re)
self.error_name = error_name
caller = inspect.stack()[1]
self.definition_location = f"{caller.filename}:{caller.lineno}"
def flatten(
self, with_metadata: bool = False
) -> typing.Generator[FlattenedWithMetadata, None, None]:
@ -1617,14 +1624,17 @@ class NonTerminal(Rule):
grammar class.
"""
fn: typing.Callable[["Grammar"], Rule]
fn: typing.Callable[[], Rule]
name: str
transparent: bool
error_name: str | None
definition_location: str
_definition: Rule | None
_body: "list[list[NonTerminal | Terminal]] | None"
def __init__(
self,
fn: typing.Callable[["Grammar"], Rule],
fn: typing.Callable[[], Rule],
name: str | None = None,
transparent: bool = False,
error_name: str | None = None,
@ -1645,22 +1655,37 @@ class NonTerminal(Rule):
self.name = name or fn.__name__
self.transparent = transparent
self.error_name = error_name
self._definition = None
self._body = None
def generate_body(self, grammar) -> list[list[str | Terminal]]:
"""Generate the body of the non-terminal.
caller = inspect.stack()[1]
self.definition_location = f"{caller.filename}:{caller.lineno}"
We do this by first calling the associated function in order to get a
Rule, and then flattening the Rule into the associated set of
productions. We strip the metadata from the flattened result to make
life a little easier for the caller.
@property
def definition(self) -> Rule:
"""The rule that is the definition of this nonterminal.
(As opposed this rule itself, which is... itself.)
"""
if self._definition is None:
self._definition = self.fn()
return self._definition
@property
def body(self) -> "list[list[NonTerminal | Terminal]]":
"""The flattened body of the nonterminal: a list of productions where
each production is a sequence of Terminals and NonTerminals.
"""
def without_metadata(result: FlattenedWithMetadata) -> list[str | Terminal]:
def without_metadata(result: FlattenedWithMetadata) -> list[NonTerminal | Terminal]:
for item in result:
assert not isinstance(item, tuple)
return typing.cast(list[str | Terminal], result)
return typing.cast(list[NonTerminal | Terminal], result)
return [without_metadata(rule) for rule in self.fn(grammar).flatten(with_metadata=False)]
if self._body is None:
self._body = [without_metadata(rule) for rule in self.fn().flatten(with_metadata=False)]
return self._body
def flatten(
self, with_metadata: bool = False
@ -1669,7 +1694,7 @@ class NonTerminal(Rule):
# the context of some other production. Yield ourselves, and trust that
# in time we will be asked to generate our body.
del with_metadata
yield [self.name]
yield [self]
class AlternativeRule(Rule):
@ -1775,7 +1800,7 @@ def mark(rule: Rule, **kwargs) -> Rule:
@typing.overload
def rule(f: typing.Callable, /) -> Rule: ...
def rule(f: typing.Callable, /) -> NonTerminal: ...
@typing.overload
@ -1783,16 +1808,15 @@ def rule(
name: str | None = None,
transparent: bool | None = None,
error_name: str | None = None,
) -> typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]: ...
) -> typing.Callable[[typing.Callable[[], Rule]], NonTerminal]: ...
def rule(
name: str | None | typing.Callable = None,
transparent: bool | None = None,
error_name: str | None = None,
) -> Rule | typing.Callable[[typing.Callable[[typing.Any], Rule]], Rule]:
"""The decorator that marks a method in a Grammar object as a nonterminal
rule.
) -> NonTerminal | typing.Callable[[typing.Callable[[], Rule]], NonTerminal]:
"""The decorator that marks a function as a nonterminal rule.
As with all the best decorators, it can be called with or without arguments.
If called with one argument, that argument is a name that overrides the name
@ -1801,7 +1825,7 @@ def rule(
if callable(name):
return rule()(name)
def wrapper(f: typing.Callable[[typing.Any], Rule]):
def wrapper(f: typing.Callable[[], Rule]):
nonlocal name
nonlocal transparent
nonlocal error_name
@ -2746,22 +2770,89 @@ class TriviaMode(enum.Enum):
###############################################################################
# Finally, the base class for grammars
# Finally, the grammar class.
###############################################################################
PrecedenceList = list[typing.Tuple[Assoc, list[Rule | str]]]
PrecedenceList = list[typing.Tuple[Assoc, list[Terminal|NonTerminal]]]
def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str,NonTerminal], dict[str,Terminal]]:
"""Starting from the given NonTerminal, gather all of the symbols
(NonTerminals and Terminals) that make up the grammar.
"""
# NOTE: We use a dummy dictionary here to preserve insertion order.
# That way the first element in named_rules is always the start
# symbol!
rules: dict[NonTerminal, int] = {}
terminals: dict[Terminal, int] = {}
# STEP 1 is to just gather all of the symbols that we can find.
queue: list[NonTerminal] = [start]
while len(queue) > 0:
nt = queue.pop()
if nt in rules:
continue
# TODO: Here we can track modules (via the funcitons that make up
# nonterminals, maybe) and maybe use that to infer terminal
# names.
rules[nt] = len(rules)
for rule in nt.body:
for symbol in rule:
if isinstance(symbol, NonTerminal):
if symbol not in rules:
queue.append(symbol)
elif isinstance(symbol, Terminal):
terminals[symbol] = len(terminals)
else:
typing.assert_never(symbol)
# (Terminals are also reachable!)
for symbol in trivia:
terminals[symbol] = len(terminals)
# Step 2 is to organize all of these things and check them for errors.
named_rules: dict[str, NonTerminal] = {}
for rule in rules:
existing = named_rules.get(rule.name)
if existing is not None:
# TODO TEST
raise ValueError(f"""Found more than one rule named {rule.name}:
- {existing.definition_location}
- {rule.definition_location}""")
named_rules[rule.name] = rule
named_terminals: dict[str, Terminal] = {}
for terminal in terminals:
existing = named_terminals.get(terminal.name)
if existing is not None:
# TODO TEST
raise ValueError(f"""Found more than one terminal named {terminal.name}:
- {existing.definition_location}
- {terminal.definition_location}""")
existing_rule = named_rules.get(terminal.name)
if existing_rule is not None:
# TODO TEST
raise ValueError(f"""Found a terminal and a rule both named {terminal.name}:
- The rule was defined at {existing_rule.definition_location}
- The terminal was defined at {terminal.definition_location}""")
named_terminals[terminal.name] = terminal
return (named_rules, named_terminals)
class Grammar:
"""The base class for defining a grammar.
Inherit from this, and and define members for your nonterminals, and then
use the `build_table` method to construct the parse tables.
"""A container that holds all the terminals and nonterminals for a
given grammar. The terminals and nonterminals are defined elsewhere;
provide the starting rule and this object will build the grammar from
everything accessible.
Here's an example of a simple grammar:
class SimpleGrammar(Grammar):
@rule
def expression(self):
return seq(self.expression, self.PLUS, self.term) | self.term
@ -2775,116 +2866,54 @@ class Grammar:
RPAREN = Terminal(')')
ID = Terminal('id')
grammar = Grammar(start=expression)
Not very exciting, perhaps, but it's something.
"""
_precedence: dict[str, typing.Tuple[Assoc, int]]
_generator: type[ParserGenerator]
start: NonTerminal
name: str
pretty_indent: str | None
_terminals: dict[str, Terminal]
_nonterminals: dict[str, NonTerminal]
_trivia: list[Terminal]
_precedence: dict[str, typing.Tuple[Assoc, int]]
def __init__(
self,
start: str | NonTerminal | None = None,
start: NonTerminal,
precedence: PrecedenceList | None = None,
generator: type[ParserGenerator] | None = None,
trivia: list[str | Terminal] | None = None,
trivia: list[Terminal] | None = None,
name: str | None = None,
pretty_indent: str | None = None,
):
if start is None:
start = getattr(self, "start", None)
if start is None:
raise ValueError(
"The default start rule must either be specified in the constructor or as an "
"attribute in the class."
)
if isinstance(start, NonTerminal):
start = start.name
if start.transparent:
# TODO: TEST
raise ValueError("The start rule cannot be transparent")
if precedence is None:
precedence = getattr(self, "precedence", [])
precedence = []
assert precedence is not None
if generator is None:
generator = getattr(self, "generator", ParserGenerator)
assert generator is not None
if trivia is None:
trivia = getattr(self, "trivia", [])
trivia = []
assert trivia is not None
# Fixup terminal names with the name of the member that declared it.
terminals = {}
for n, t in inspect.getmembers(self, lambda x: isinstance(x, Terminal)):
if t.name is None:
t.name = n
if n in terminals:
raise ValueError(f"More than one terminal has the name '{n}'")
terminals[n] = t
# Get the nonterminals.
nonterminals = {}
for _, nt in inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)):
if nt.name in nonterminals:
raise ValueError(f"More than one nonterminal found with the name '{nt.name}'")
if nt.name in terminals:
raise ValueError(
f"'{nt.name}' is the name of both a Terminal and a NonTerminal rule"
)
nonterminals[nt.name] = nt
# Resolve the trivia declarations correctly.
resolved_trivia: list[Terminal] = []
for t in trivia:
if isinstance(t, str):
resolved = terminals.get(t)
if resolved is None:
raise ValueError(f"The trivia '{t}' is not a terminal name")
resolved_trivia.append(resolved)
elif isinstance(t, Terminal):
resolved_trivia.append(t)
else:
raise ValueError(f"{t} must be either a terminal name or literally a terminal")
# Fix up the precedence table.
precedence_table = {}
for prec, (associativity, symbols) in enumerate(precedence):
for symbol in symbols:
key = None
if isinstance(symbol, Terminal):
key = symbol.name
if key is None:
raise ValueError(f"{symbol} is a terminal that has not had a name set yet")
elif isinstance(symbol, NonTerminal):
key = symbol.name
elif isinstance(symbol, str):
if symbol in terminals or symbol in nonterminals:
key = symbol
if key is None:
raise ValueError(
f"{symbol} must be either a Token or a NonTerminal, or the name of one"
)
precedence_table[key] = (associativity, prec + 1)
precedence_table[symbol.name] = (associativity, prec + 1)
if name is None:
name = getattr(self, "name", None)
if name is None:
name = self.__class__.__name__.removesuffix("Grammar").lower()
name = "unknown"
self._precedence = precedence_table
self.start = start
self._generator = generator
self._terminals = terminals
self._nonterminals = nonterminals
self._trivia = resolved_trivia
self.name = name
self._nonterminals, self._terminals = gather_grammar(start, trivia)
self._trivia = trivia
self._precedence = precedence_table
self.pretty_indent = pretty_indent
def terminals(self) -> list[Terminal]:
return list(self._terminals.values())
@ -2898,55 +2927,7 @@ class Grammar:
def get_precedence(self, name: str) -> None | tuple[Assoc, int]:
return self._precedence.get(name)
# TODO: The flattened form should retain NonTerminal, not just str.
def generate_nonterminal_dict(
self, start: str | None = None
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
"""Convert the rules into a dictionary of productions, and a set of
the names of transparent nonterminals.
Our table generators work on a very flat set of productions. This is the
first step in flattening the productions from the members: walk the rules
starting from the given start rule and flatten them, one by one, into a
dictionary that maps nonterminal rule name to its associated list of
productions.
"""
if start is None:
start = self.start
nonterminals = self._nonterminals
transparents = {rule.name for rule in nonterminals.values() if rule.transparent}
grammar = {}
rule = nonterminals.get(start)
if rule is None:
raise ValueError(f"Cannot find a rule named '{start}'")
if rule.transparent:
raise ValueError("The start rule cannot be transparent")
queue = [rule]
while len(queue) > 0:
rule = queue.pop()
if rule.name in grammar:
continue
body = rule.generate_body(self)
for clause in body:
for symbol in clause:
if not isinstance(symbol, Terminal):
assert isinstance(symbol, str)
nonterminal = nonterminals.get(symbol)
if nonterminal is None:
raise ValueError(f"While processing {rule.name}: cannot find {symbol}")
queue.append(nonterminal)
grammar[rule.name] = body
return (grammar, transparents)
def desugar(
self, start: str | None = None
) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
def desugar(self) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
"""Convert the rules into a flat list of productions.
Our table generators work from a very flat set of productions. The form
@ -2954,37 +2935,27 @@ class Grammar:
generate_nonterminal_dict- less useful to people, probably, but it is
the input form needed by the Generator.
"""
temp_grammar, transparents = self.generate_nonterminal_dict(start)
grammar: list[tuple[str,list[str]]] = [
(rule.name, [s.name for s in production])
for rule in self._nonterminals.values()
for production in rule.body
]
assert grammar[0][0] == self.start.name
grammar = []
for rule_name, clauses in temp_grammar.items():
for clause in clauses:
new_clause = []
for symbol in clause:
if isinstance(symbol, Terminal):
if symbol.name in temp_grammar:
raise ValueError(
f"'{symbol.name}' is the name of both a Terminal and a NonTerminal rule. This will cause problems."
)
new_clause.append(symbol.name)
else:
new_clause.append(symbol)
grammar.append((rule_name, new_clause))
transparents = {name for name, rule in self._nonterminals.items() if rule.transparent}
return grammar, transparents
def build_table(self, start: str | None = None, generator=None) -> ParseTable:
"""Construct a parse table for this grammar, starting at the named
nonterminal rule.
"""
if start is None:
start = self.start
desugared, transparents = self.desugar(start)
def build_table(self) -> ParseTable:
"""Construct a parse table for this grammar."""
desugared, transparents = self.desugar()
if generator is None:
generator = self._generator
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
gen = ParserGenerator(
self.start.name,
desugared,
precedence=self._precedence,
transparents=transparents,
)
table = gen.gen_table()
for t in self._trivia:

View file

@ -263,8 +263,7 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
if rule.transparent:
rule_name = "_" + rule_name
body = rule.fn(grammar)
rule_definition = convert_to_tree_sitter(body, grammar)
rule_definition = convert_to_tree_sitter(rule.definition, grammar)
if rule_definition is None:
raise Exception(f"Tree-sitter does not support the empty rule {rule_name}")
rule_definition = apply_precedence(rule_definition, rule.name, grammar)
@ -283,7 +282,6 @@ def emit_tree_sitter_grammar(grammar: parser.Grammar, path: pathlib.Path | str):
def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
nts = {nt.name: nt for nt in grammar.non_terminals()}
scope_suffix = "." + grammar.name
def scoop(input: parser.FlattenedWithMetadata, visited: set[str]) -> list[str]:
@ -300,13 +298,12 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
raise Exception("Highlight must come with a field name") # TODO
parts.append(f"{field_name}: _ @{highlight.scope}{scope_suffix}")
elif isinstance(item, str):
nt = nts[item]
if nt.transparent:
if nt.name in visited:
elif isinstance(item, parser.NonTerminal):
if item.transparent:
if item.name in visited:
continue
visited.add(nt.name)
body = nt.fn(grammar)
visited.add(item.name)
body = item.definition
for production in body.flatten(with_metadata=True):
parts.extend(scoop(production, visited))
@ -317,7 +314,7 @@ def emit_tree_sitter_queries(grammar: parser.Grammar, path: pathlib.Path | str):
if rule.transparent:
continue
body = rule.fn(grammar)
body = rule.definition
patterns = set()
for production in body.flatten(with_metadata=True):
# Scoop up the meta...

View file

@ -79,11 +79,7 @@ class MatcherTable:
newline_replace: dict[str, str]
def _compile_nonterminal_matcher(
grammar: parser.Grammar,
nonterminals: dict[str, parser.NonTerminal],
rule: parser.NonTerminal,
) -> MatcherTable:
def _compile_nonterminal_matcher(rule: parser.NonTerminal) -> MatcherTable:
"""Generate a matcher table for a single nonterminal.
See the docs for [MatcherTable] to understand the result.
@ -111,7 +107,7 @@ def _compile_nonterminal_matcher(
def compile_nonterminal(name: str, rule: parser.NonTerminal):
if name not in visited:
visited.add(name)
for production in rule.fn(grammar).flatten(with_metadata=True):
for production in rule.fn().flatten(with_metadata=True):
trans_prod = compile_production(production)
generated_grammar.append((name, trans_prod))
@ -126,19 +122,18 @@ def _compile_nonterminal_matcher(
result = []
for item in production:
if isinstance(item, str):
nt = nonterminals[item]
if nt.transparent:
if isinstance(item, parser.NonTerminal):
if item.transparent:
# If it's transparent then we make a new set of
# productions that covers the contents of the
# transparent nonterminal.
name = "xxx_" + nt.name
compile_nonterminal(name, nt)
name = "xxx_" + item.name
compile_nonterminal(name, item)
result.append(name)
else:
# Otherwise it's a "token" in our input, named
# "tree_{whatever}".
result.append(f"tree_{item}")
result.append(f"tree_{item.name}")
elif isinstance(item, parser.Terminal):
# If it's a terminal it will appear in our input as
@ -257,7 +252,7 @@ def _compile_nonterminal_matcher(
start_name = f"yyy_{rule.name}"
compile_nonterminal(start_name, rule)
gen = grammar._generator(start_name, generated_grammar)
gen = parser.ParserGenerator(start_name, generated_grammar)
parse_table = gen.gen_table()
for (_, replacement), rule_name in newlines.items():
@ -296,7 +291,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) ->
matchers = {}
if indent is None:
indent = getattr(grammar, "pretty_indent", None)
indent = grammar.pretty_indent
if indent is None:
indent = " "
@ -307,7 +302,7 @@ def compile_pretty_table(grammar: parser.Grammar, indent: str | None = None) ->
trivia_mode[t.name] = mode
for name, rule in nonterminals.items():
matchers[name] = _compile_nonterminal_matcher(grammar, nonterminals, rule)
matchers[name] = _compile_nonterminal_matcher(rule)
return PrettyTable(
indent,

223
sql.py
View file

@ -2,6 +2,7 @@ from parser import *
NAME = Terminal(
"NAME",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
@ -9,6 +10,7 @@ NAME = Terminal(
)
STRING = Terminal(
"STRING",
Re.seq(
Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
@ -18,6 +20,7 @@ STRING = Terminal(
)
NUMBER = Terminal(
"NUMBER",
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
@ -33,118 +36,118 @@ NUMBER = Terminal(
highlight=highlight.constant.numeric,
)
OR = Terminal("or")
AND = Terminal("and")
NOT = Terminal("not")
OR = Terminal("OR", "or")
AND = Terminal("AND", "and")
NOT = Terminal("NOT", "not")
COMPARISON = Terminal(
"COMPARISON",
Re.literal("=")
| Re.literal("<>")
| Re.literal("<")
| Re.literal(">")
| Re.literal("<=")
| Re.literal(">=")
| Re.literal(">="),
)
PLUS = Terminal("+")
MINUS = Terminal("-")
STAR = Terminal("*")
SLASH = Terminal("/")
PLUS = Terminal("PLUS", "+")
MINUS = Terminal("MINUS", "-")
STAR = Terminal("STAR", "*")
SLASH = Terminal("SLASH", "/")
precedence = [
(Assoc.LEFT, ["OR"]),
(Assoc.LEFT, ["AND"]),
(Assoc.LEFT, ["NOT"]),
(Assoc.LEFT, ["COMPARISON"]),
(Assoc.LEFT, ["PLUS", "MINUS"]),
(Assoc.LEFT, ["STAR", "SLASH"]),
# TODO: Unary minus
]
ALL = Terminal("ALL", "all")
AMMSC = Terminal("AMMSC", "ammsc")
ANY = Terminal("ANY", "any")
AS = Terminal("AS", "as")
ASC = Terminal("ASC", "asc")
AUTHORIZATION = Terminal("AUTHORIZATION", "authorization")
BETWEEN = Terminal("BETWEEN", "between")
BY = Terminal("BY", "by")
CHARACTER = Terminal("CHARACTER", "character")
CHECK = Terminal("CHECK", "check")
CLOSE = Terminal("CLOSE", "close")
COMMIT = Terminal("COMMIT", "commit")
CONTINUE = Terminal("CONTINUE", "continue")
CREATE = Terminal("CREATE", "create")
CURRENT = Terminal("CURRENT", "current")
CURSOR = Terminal("CURSOR", "cursor")
DECIMAL = Terminal("DECIMAL", "decimal")
DECLARE = Terminal("DECLARE", "declare")
DEFAULT = Terminal("DEFAULT", "default")
DELETE = Terminal("DELETE", "delete")
DESC = Terminal("DESC", "desc")
DISTINCT = Terminal("DISTINCT", "distinct")
DOUBLE = Terminal("DOUBLE", "double")
ESCAPE = Terminal("ESCAPE", "escape")
EXISTS = Terminal("EXISTS", "exists")
FETCH = Terminal("FETCH", "fetch")
FLOAT = Terminal("FLOAT", "float")
FOR = Terminal("FOR", "for")
FOREIGN = Terminal("FOREIGN", "foreign")
FOUND = Terminal("FOUND", "found")
FROM = Terminal("FROM", "from")
GOTO = Terminal("GOTO", "goto")
GRANT = Terminal("GRANT", "grant")
GROUP = Terminal("GROUP", "group")
HAVING = Terminal("HAVING", "having")
IN = Terminal("IN", "in")
INDICATOR = Terminal("INDICATOR", "indicator")
INSERT = Terminal("INSERT", "insert")
INTEGER = Terminal("INTEGER", "integer")
INTO = Terminal("INTO", "into")
IS = Terminal("IS", "is")
KEY = Terminal("KEY", "key")
LANGUAGE = Terminal("LANGUAGE", "language")
LIKE = Terminal("LIKE", "like")
NULL = Terminal("NULL", "null")
NUMERIC = Terminal("NUMERIC", "numeric")
OF = Terminal("OF", "of")
ON = Terminal("ON", "on")
OPEN = Terminal("OPEN", "open")
OPTION = Terminal("OPTION", "option")
ORDER = Terminal("ORDER", "order")
PARAMETER = Terminal("PARAMETER", "parameter")
PRECISION = Terminal("PRECISION", "precision")
PRIMARY = Terminal("PRIMARY", "primary")
PRIVILEGES = Terminal("PRIVILEGES", "privileges")
PROCEDURE = Terminal("PROCEDURE", "procedure")
PUBLIC = Terminal("PUBLIC", "public")
REAL = Terminal("REAL", "real")
REFERENCES = Terminal("REFERENCES", "references")
ROLLBACK = Terminal("ROLLBACK", "rollback")
SCHEMA = Terminal("SCHEMA", "schema")
SELECT = Terminal("SELECT", "select")
SET = Terminal("SET", "set")
SMALLINT = Terminal("SMALLINT", "smallint")
SOME = Terminal("SOME", "some")
SQLCODE = Terminal("SQLCODE", "sqlcode")
SQLERROR = Terminal("SQLERROR", "sqlerror")
TABLE = Terminal("TABLE", "table")
TO = Terminal("TO", "to")
UNION = Terminal("UNION", "union")
UNIQUE = Terminal("UNIQUE", "unique")
UPDATE = Terminal("UPDATE", "update")
USER = Terminal("USER", "user")
VALUES = Terminal("VALUES", "values")
VIEW = Terminal("VIEW", "view")
WHENEVER = Terminal("WHENEVER", "whenever")
WHERE = Terminal("WHERE", "where")
WITH = Terminal("WITH", "with")
WORK = Terminal("WORK", "work")
ALL = Terminal("all")
AMMSC = Terminal("ammsc")
ANY = Terminal("any")
ASC = Terminal("asc")
AUTHORIZATION = Terminal("authorization")
BETWEEN = Terminal("between")
BY = Terminal("by")
CHARACTER = Terminal("character")
CHECK = Terminal("check")
CLOSE = Terminal("close")
COMMIT = Terminal("commit")
CONTINUE = Terminal("continue")
CREATE = Terminal("create")
CURRENT = Terminal("current")
CURSOR = Terminal("cursor")
DECIMAL = Terminal("decimal")
DECLARE = Terminal("declare")
DEFAULT = Terminal("default")
DELETE = Terminal("delete")
DESC = Terminal("desc")
DISTINCT = Terminal("distinct")
DOUBLE = Terminal("double")
ESCAPE = Terminal("escape")
EXISTS = Terminal("exists")
FETCH = Terminal("fetch")
FLOAT = Terminal("float")
FOR = Terminal("for")
FOREIGN = Terminal("foreign")
FOUND = Terminal("found")
FROM = Terminal("from")
GOTO = Terminal("goto")
GRANT = Terminal("grant")
GROUP = Terminal("group")
HAVING = Terminal("having")
IN = Terminal("in")
INDICATOR = Terminal("indicator")
INSERT = Terminal("insert")
INTEGER = Terminal("integer")
INTO = Terminal("into")
IS = Terminal("is")
KEY = Terminal("key")
LANGUAGE = Terminal("language")
LIKE = Terminal("like")
NULL = Terminal("null")
NUMERIC = Terminal("numeric")
OF = Terminal("of")
ON = Terminal("on")
OPEN = Terminal("open")
OPTION = Terminal("option")
ORDER = Terminal("order")
PARAMETER = Terminal("parameter")
PRECISION = Terminal("precision")
PRIMARY = Terminal("primary")
PRIVILEGES = Terminal("privileges")
PROCEDURE = Terminal("procedure")
PUBLIC = Terminal("public")
REAL = Terminal("real")
REFERENCES = Terminal("references")
ROLLBACK = Terminal("rollback")
SCHEMA = Terminal("schema")
SELECT = Terminal("select")
SET = Terminal("set")
SMALLINT = Terminal("smallint")
SOME = Terminal("some")
SQLCODE = Terminal("sqlcode")
SQLERROR = Terminal("sqlerror")
TABLE = Terminal("table")
TO = Terminal("to")
UNION = Terminal("union")
UNIQUE = Terminal("unique")
UPDATE = Terminal("update")
USER = Terminal("user")
VALUES = Terminal("values")
VIEW = Terminal("view")
WHENEVER = Terminal("whenever")
WHERE = Terminal("where")
WITH = Terminal("with")
WORK = Terminal("work")
SEMICOLON = Terminal("SEMICOLON", ";")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
COMMA = Terminal("COMMA", ",")
EQUAL = Terminal("EQUAL", "=")
DOT = Terminal("DOT", ".")
SEMICOLON = Terminal(";")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
COMMA = Terminal(",")
EQUAL = Terminal("=")
DOT = Terminal(".")
AS = Terminal("as")
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
"COMMENT",
Re.seq(Re.literal("--"), Re.set("\n").invert().star()),
highlight=highlight.comment.line,
trivia_mode=TriviaMode.LineComment,
)
@rule
@ -740,3 +743,19 @@ def user():
@rule
def when_action():
return (GOTO + NAME) | CONTINUE
SQL = Grammar(
start=sql_list,
precedence=[
(Assoc.LEFT, [OR]),
(Assoc.LEFT, [AND]),
(Assoc.LEFT, [NOT]),
(Assoc.LEFT, [COMPARISON]),
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
# TODO: Unary minus
],
trivia=[BLANKS, COMMENT, LINE_BREAK],
name="SQL",
)

View file

@ -11,129 +11,31 @@ import parser.runtime as runtime
# Tests based on
# https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
class LGrammar(Grammar):
start = "File"
trivia = ["BLANKS"]
# Need a little bit of disambiguation for the symbol involved.
precedence = [
(Assoc.LEFT, ["PLUS", "MINUS"]),
(Assoc.LEFT, ["STAR", "SLASH"]),
(Assoc.LEFT, ["LPAREN"]),
]
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
@rule
def File(self):
# TODO: Make lists easier
return self._functions
@rule
def _functions(self):
return self.Function | (self._functions + self.Function)
@rule
def Function(self):
return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block
@rule
def ParamList(self):
return self.LPAREN + opt(self._parameters) + self.RPAREN
@rule
def _parameters(self):
# NOTE: The ungrammar in the reference does not talk about commas required between parameters
# so this massages it to make them required. Commas are in the list not the param, which
# is more awkward for processing but not terminally so.
return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters)
@rule
def Param(self):
return self.NAME + self.COLON + self.TypeExpr
@rule
def TypeExpr(self):
return self.NAME
@rule
def Block(self):
return self.LCURLY + opt(self._statements) + self.RCURLY
@rule
def _statements(self):
return self.Stmt | self._statements + self.Stmt
@rule
def Stmt(self):
return self.StmtExpr | self.StmtLet | self.StmtReturn
@rule
def StmtExpr(self):
return self.Expr + self.SEMICOLON
@rule
def StmtLet(self):
return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON
@rule
def StmtReturn(self):
return self.RETURN + self.Expr + self.SEMICOLON
@rule
def Expr(self):
return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall
@rule
def ExprLiteral(self):
return self.INT | self.TRUE | self.FALSE
@rule
def ExprName(self):
return self.NAME
@rule
def ExprParen(self):
return self.LPAREN + self.Expr + self.RPAREN
@rule
def ExprBinary(self):
return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr
@rule
def ExprCall(self):
return self.Expr + self.ArgList
@rule
def ArgList(self):
return self.LPAREN + opt(self._arg_star) + self.RPAREN
@rule
def _arg_star(self):
# Again, a deviation from the original. See _parameters.
return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
TRUE = Terminal("true")
FALSE = Terminal("false")
INT = Terminal(Re.set(("0", "9")).plus())
FN = Terminal("fn")
ARROW = Terminal("->")
COMMA = Terminal(",")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COLON = Terminal(":")
SEMICOLON = Terminal(";")
LET = Terminal("let")
EQUAL = Terminal("=")
RETURN = Terminal("return")
PLUS = Terminal("+")
MINUS = Terminal("-")
STAR = Terminal("*")
SLASH = Terminal("/")
TRUE = Terminal("TRUE", "true")
FALSE = Terminal("FALSE", "false")
INT = Terminal("INT", Re.set(("0", "9")).plus())
FN = Terminal("FN", "fn")
ARROW = Terminal("ARROW", "->")
COMMA = Terminal("COMMA", ",")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
LCURLY = Terminal("LCURLY", "{")
RCURLY = Terminal("RCURLY", "}")
COLON = Terminal("COLON", ":")
SEMICOLON = Terminal("SEMICOLON", ";")
LET = Terminal("LET", "let")
EQUAL = Terminal("EQUAL", "=")
RETURN = Terminal("RETURN", "return")
PLUS = Terminal("PLUS", "+")
MINUS = Terminal("MINUS", "-")
STAR = Terminal("STAR", "*")
SLASH = Terminal("SLASH", "/")
NAME = Terminal(
"NAME",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
@ -141,8 +43,109 @@ class LGrammar(Grammar):
)
L_PARSE_TABLE = LGrammar().build_table()
L_LEXER_TABLE = LGrammar().compile_lexer()
@rule
def File():
# TODO: Make lists easier
return _functions
@rule
def _functions():
return Function | (_functions + Function)
@rule
def Function():
return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block
@rule
def ParamList():
return LPAREN + opt(_parameters) + RPAREN
@rule
def _parameters():
# NOTE: The ungrammar in the reference does not talk about commas
# required between parameters so this massages it to make them
# required. Commas are in the list not the param, which is more
# awkward for processing but not terminally so.
return (Param + opt(COMMA)) | (Param + COMMA + _parameters)
@rule
def Param():
return NAME + COLON + TypeExpr
@rule
def TypeExpr():
return NAME
@rule
def Block():
return LCURLY + opt(_statements) + RCURLY
@rule
def _statements():
return Stmt | _statements + Stmt
@rule
def Stmt():
return StmtExpr | StmtLet | StmtReturn
@rule
def StmtExpr():
return Expr + SEMICOLON
@rule
def StmtLet():
return LET + NAME + EQUAL + Expr + SEMICOLON
@rule
def StmtReturn():
return RETURN + Expr + SEMICOLON
@rule
def Expr():
return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall
@rule
def ExprLiteral():
return INT | TRUE | FALSE
@rule
def ExprName():
return NAME
@rule
def ExprParen():
return LPAREN + Expr + RPAREN
@rule
def ExprBinary():
return Expr + (PLUS | MINUS | STAR | SLASH) + Expr
@rule
def ExprCall():
return Expr + ArgList
@rule
def ArgList():
return LPAREN + opt(_arg_star) + RPAREN
@rule
def _arg_star():
# Again, a deviation from the original. See _parameters.
return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star)
LGrammar = Grammar(
start=File,
trivia=[BLANKS],
# Need a little bit of disambiguation for the symbol involved.
precedence = [
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
(Assoc.LEFT, [LPAREN]),
],
)
L_PARSE_TABLE = LGrammar.build_table()
L_LEXER_TABLE = LGrammar.compile_lexer()
def test_matklad_one():

View file

@ -1,6 +1,5 @@
import pytest
import parser
import parser.runtime as runtime
from parser import Grammar, seq, rule, Terminal
@ -40,117 +39,68 @@ def _tree(treeform, count=0) -> runtime.Tree | runtime.TokenValue:
def test_lr0_lr0():
"""An LR0 grammar should work with an LR0 generator."""
class G(Grammar):
start = "E"
# generator = parser.GenerateLR0
PLUS = Terminal("+", "+")
LPAREN = Terminal("(", "(")
RPAREN = Terminal(")", ")")
IDENTIFIER = Terminal("id", "id")
@rule
def E(self):
return seq(self.E, self.PLUS, self.T) | self.T
def E():
return seq(E, PLUS, T) | T
@rule
def T(self):
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
def T():
return seq(LPAREN, E, RPAREN) | IDENTIFIER
PLUS = Terminal("+", name="+")
LPAREN = Terminal("(", name="(")
RPAREN = Terminal(")", name=")")
IDENTIFIER = Terminal("id", name="id")
G = Grammar(start=E)
table = G().build_table()
tree, errors = runtime.Parser(table).parse(
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
)
table = G.build_table()
tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN))
assert errors == []
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
def test_all_generators():
"""This grammar should work with everything honestly."""
class G(Grammar):
start = "E"
@rule
def E(self):
return seq(self.E, self.PLUS, self.T) | self.T
@rule
def T(self):
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
PLUS = Terminal("+", name="+")
LPAREN = Terminal("(", name="(")
RPAREN = Terminal(")", name=")")
IDENTIFIER = Terminal("id", name="id")
GENERATORS = [
# parser.GenerateLR0,
# parser.GeneratePager,
parser.ParserGenerator,
]
for generator in GENERATORS:
table = G().build_table(generator=generator)
tree, errors = runtime.Parser(table).parse(
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
)
print("\n")
print(generator)
print(f"{table.format()}")
assert errors == []
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
def test_grammar_aho_ullman_2():
class TestGrammar(Grammar):
start = "S"
@rule
def S():
return seq(X, X)
@rule
def S(self):
return seq(self.X, self.X)
def X():
return seq(A, X) | B
@rule
def X(self):
return seq(self.A, self.X) | self.B
A = Terminal("A", "a")
B = Terminal("B", "b")
A = Terminal("a")
B = Terminal("b")
TestGrammar().build_table(generator=parser.ParserGenerator)
# TestGrammar().build_table(generator=parser.GeneratePager)
Grammar(start=S).build_table()
def test_fun_lalr():
class TestGrammar(Grammar):
start = "S"
@rule
def S():
return seq(V, E)
@rule
def S(self):
return seq(self.V, self.E)
def E():
return F | seq(E, PLUS, F)
@rule
def E(self):
return self.F | seq(self.E, self.PLUS, self.F)
def F():
return V | INT | seq(LPAREN, E, RPAREN)
@rule
def F(self):
return self.V | self.INT | seq(self.LPAREN, self.E, self.RPAREN)
def V():
return ID
@rule
def V(self):
return self.ID
PLUS = Terminal("PLUS", "+")
INT = Terminal("INT", "int")
ID = Terminal("ID", "id")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
PLUS = Terminal("+")
INT = Terminal("int")
ID = Terminal("id")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
TestGrammar().build_table()
Grammar(start=S).build_table()
def test_conflicting_names():
@ -167,43 +117,28 @@ def test_conflicting_names():
to understand.
"""
class TestGrammar(Grammar):
start = "IDENTIFIER"
@rule("IDENTIFIER")
def identifier(self):
return self.IDENTIFIER
def identifier():
return IDENTIFIER
IDENTIFIER = Terminal("Identifier")
IDENTIFIER = Terminal("IDENTIFIER", "Identifier")
with pytest.raises(ValueError):
TestGrammar().build_table()
Grammar(start=identifier).build_table()
def test_grammar_ignore_trivia():
class G(Grammar):
start = "sentence"
trivia = ["BLANK"]
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
def sentence():
return WORD | seq(sentence, WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
WORD = Terminal("WORD", "blah")
BLANK = Terminal("BLANK", " ")
table = G().build_table()
table = Grammar(start=sentence, trivia=[BLANK]).build_table()
assert "BLANK" in table.trivia
tree, errors = runtime.Parser(table).parse(
Tokens(
G.WORD,
G.BLANK,
G.WORD,
G.BLANK,
)
)
tree, errors = runtime.Parser(table).parse(Tokens(WORD, BLANK, WORD, BLANK))
assert errors == []
assert tree == runtime.Tree(
@ -234,135 +169,3 @@ def test_grammar_ignore_trivia():
),
),
)
def test_grammar_unknown_trivia():
class G(Grammar):
start = "sentence"
trivia = ["BLANK"]
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
with pytest.raises(ValueError):
G().build_table()
def test_grammar_trivia_symbol():
class G(Grammar):
start = "sentence"
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
trivia = [BLANK]
table = G().build_table()
assert "BLANK" in table.trivia
def test_grammar_trivia_constructor():
class G(Grammar):
start = "sentence"
def __init__(self):
super().__init__(trivia=[self.BLANK])
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
table = G().build_table()
assert "BLANK" in table.trivia
def test_grammar_trivia_constructor_string():
class G(Grammar):
start = "sentence"
def __init__(self):
super().__init__(trivia=["BLANK"])
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
BLANK = Terminal(" ")
table = G().build_table()
assert "BLANK" in table.trivia
def test_grammar_trivia_constructor_string_unknown():
class G(Grammar):
start = "sentence"
def __init__(self):
super().__init__(trivia=["BLANK"])
@rule
def sentence(self):
return self.WORD | seq(self.sentence, self.WORD)
WORD = Terminal("blah")
with pytest.raises(ValueError):
G().build_table()
def test_grammar_name_implicit():
class FooGrammar(Grammar):
start = "x"
@rule
def x(self):
return self.WORD
WORD = Terminal("blah")
assert FooGrammar().name == "foo"
def test_grammar_name_explicit_member():
class FooGrammar(Grammar):
start = "x"
name = "bar"
@rule
def x(self):
return self.WORD
WORD = Terminal("blah")
assert FooGrammar().name == "bar"
def test_grammar_name_explicit_constructor():
class FooGrammar(Grammar):
start = "x"
name = "bar"
def __init__(self):
super().__init__(name="baz")
@rule
def x(self):
return self.WORD
WORD = Terminal("blah")
assert FooGrammar().name == "baz"

View file

@ -354,32 +354,33 @@ def test_edge_list_always_sorted(points: list[tuple[int, int]]):
def test_lexer_compile():
class LexTest(Grammar):
@rule
def foo(self):
return self.IS
def foo():
# NOTE: This is a hack to ensure the terminals are reachable. :P
return IS | AS | IDENTIFIER
start = "foo"
IS = Terminal("is")
AS = Terminal("as")
IS = Terminal("IS", "is")
AS = Terminal("AS", "as")
IDENTIFIER = Terminal(
"IDENTIFIER",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
)
)
BLANKS = Terminal(Re.set("\r", "\n", "\t", " ").plus())
BLANKS = Terminal("BLANKS", Re.set("\r", "\n", "\t", " ").plus())
lexer = LexTest().compile_lexer()
LexTest = Grammar(start=foo, trivia=[BLANKS])
lexer = LexTest.compile_lexer()
dump_lexer_table(lexer)
tokens = list(generic_tokenize("xy is ass", lexer))
assert tokens == [
(LexTest.IDENTIFIER, 0, 2),
(LexTest.BLANKS, 2, 1),
(LexTest.IS, 3, 2),
(LexTest.BLANKS, 5, 1),
(LexTest.IDENTIFIER, 6, 3),
(IDENTIFIER, 0, 2),
(BLANKS, 2, 1),
(IS, 3, 2),
(BLANKS, 5, 1),
(IDENTIFIER, 6, 3),
]
@ -387,14 +388,12 @@ def test_lexer_compile():
def test_lexer_numbers(n: float):
assume(math.isfinite(n))
class LexTest(Grammar):
@rule
def number(self):
return self.NUMBER
start = "number"
def number():
return NUMBER
NUMBER = Terminal(
"NUMBER",
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
@ -409,12 +408,15 @@ def test_lexer_numbers(n: float):
)
)
lexer = LexTest().compile_lexer()
LexTest = Grammar(start=number)
lexer = LexTest.compile_lexer()
dump_lexer_table(lexer)
number_string = str(n)
tokens = list(generic_tokenize(number_string, lexer))
assert tokens == [
(LexTest.NUMBER, 0, len(number_string)),
(NUMBER, 0, len(number_string)),
]

View file

@ -23,69 +23,66 @@ import parser.wadler.builder as builder
import parser.wadler.runtime as runtime
class JsonGrammar(Grammar):
start = "root"
trivia = ["BLANKS"]
def make_json_grammar():
@rule
def root(self):
return self.value
def root():
return value
@rule(transparent=True)
def value(self):
def value():
return (
self.object
| self.array
| self.NUMBER
| self.TRUE
| self.FALSE
| self.NULL
| self.STRING
object
| array
| NUMBER
| TRUE
| FALSE
| NULL
| STRING
)
@rule
def object(self):
def object():
return group(
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
LCURLY + opt(indent(newline() + _object_pairs)) + newline() + RCURLY
)
@rule
def _object_pairs(self):
def _object_pairs():
return alt(
self.object_pair,
self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
object_pair,
object_pair + COMMA + newline(" ") + _object_pairs,
)
@rule
def object_pair(self):
return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
def object_pair():
return group(STRING + COLON + indent(newline(" ") + value))
@rule
def array(self):
def array():
return group(
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
LSQUARE + opt(indent(newline() + _array_items)) + newline() + RSQUARE
)
@rule
def _array_items(self):
def _array_items():
return alt(
self.value,
self.value + self.COMMA + newline(" ") + self._array_items,
value,
value + COMMA + newline(" ") + _array_items,
)
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COMMA = Terminal(",")
COLON = Terminal(":")
LSQUARE = Terminal("[")
RSQUARE = Terminal("]")
TRUE = Terminal("true")
FALSE = Terminal("false")
NULL = Terminal("null")
LCURLY = Terminal("LCURLY", "{")
RCURLY = Terminal("RCURLY", "}")
COMMA = Terminal("COMMA", ",")
COLON = Terminal("COLON", ":")
LSQUARE = Terminal("LSQUARE", "[")
RSQUARE = Terminal("RSQUARE", "]")
TRUE = Terminal("TRUE", "true")
FALSE = Terminal("FALSE", "false")
NULL = Terminal("NULL", "null")
NUMBER = Terminal(
"NUMBER",
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
@ -100,6 +97,7 @@ class JsonGrammar(Grammar):
),
)
STRING = Terminal(
"STRING",
Re.seq(
Re.literal('"'),
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
@ -107,8 +105,9 @@ class JsonGrammar(Grammar):
)
)
return Grammar(start=root, trivia=[BLANKS])
JSON = JsonGrammar()
JSON = make_json_grammar()
JSON_PARSER = JSON.build_table()
JSON_LEXER = JSON.compile_lexer()
@ -228,47 +227,49 @@ def test_layout_basic():
)
class TG(Grammar):
start = "root"
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
def make_test_grammar():
@rule
def root():
return _expression
@rule
def root(self):
return self._expression
def _expression():
return word | list
@rule
def _expression(self):
return self.word | self.list
def list():
return group(LPAREN, indent(nl, _expressions), nl, RPAREN)
@rule
def list(self):
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN)
def _expressions():
return _expression | seq(_expressions, sp, _expression)
@rule
def _expressions(self):
return self._expression | seq(self._expressions, sp, self._expression)
def word():
return OK | seq(BREAK, br, BREAK)
@rule
def word(self):
return self.OK | seq(self.BREAK, br, self.BREAK)
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
OK = Terminal("OK", "ok")
BREAK = Terminal("BREAK", "break")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
OK = Terminal("ok")
BREAK = Terminal("break")
BLANKS = Terminal(Re.set(" ", "\t").plus())
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
"COMMENT",
Re.seq(Re.literal(";"), Re.set("\n").invert().star()),
trivia_mode=TriviaMode.LineComment,
)
return Grammar(start=root, trivia=[BLANKS, LINE_BREAK, COMMENT], pretty_indent=" ")
TG = make_test_grammar()
def test_forced_break():
g = TG()
g_lexer = g.compile_lexer()
g_parser = g.build_table()
g_lexer = TG.compile_lexer()
g_parser = TG.build_table()
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
@ -276,7 +277,7 @@ def test_forced_break():
assert errors == []
assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g))
printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -296,9 +297,8 @@ def test_forced_break():
def test_maintaining_line_breaks():
g = TG()
g_lexer = g.compile_lexer()
g_parser = g.build_table()
g_lexer = TG.compile_lexer()
g_parser = TG.build_table()
text = """((ok ok)
; Don't break here.
@ -316,7 +316,7 @@ def test_maintaining_line_breaks():
assert errors == []
assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g))
printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -325,10 +325,10 @@ def test_maintaining_line_breaks():
(ok ok)
; Don't break here.
(ok)
*SPACE*
*SPACE**SPACE*
; ^ Do keep this break though.
(ok)
*SPACE*
*SPACE**SPACE*
; ^ This should only be one break.
(ok)
)
@ -337,9 +337,8 @@ def test_maintaining_line_breaks():
def test_trailing_trivia():
g = TG()
g_lexer = g.compile_lexer()
g_parser = g.build_table()
g_lexer = TG.compile_lexer()
g_parser = TG.build_table()
text = """((ok ok)); Don't lose this!
@ -350,7 +349,7 @@ def test_trailing_trivia():
assert errors == []
assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g))
printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -363,9 +362,8 @@ def test_trailing_trivia():
def test_trailing_trivia_two():
g = TG()
g_lexer = g.compile_lexer()
g_parser = g.build_table()
g_lexer = TG.compile_lexer()
g_parser = TG.build_table()
text = """((ok ok))
@ -376,7 +374,7 @@ def test_trailing_trivia_two():
assert errors == []
assert tree is not None
printer = runtime.Printer(builder.compile_pretty_table(g))
printer = runtime.Printer(builder.compile_pretty_table(TG))
result = printer.format_tree(tree, text, 200).apply_to_source(text)
assert result == _output(
@ -389,9 +387,8 @@ def test_trailing_trivia_two():
def test_trailing_trivia_split():
g = TG()
g_lexer = g.compile_lexer()
g_parser = g.build_table()
g_lexer = TG.compile_lexer()
g_parser = TG.build_table()
text = """((ok ok)); Don't lose this!
@ -432,7 +429,7 @@ def test_trailing_trivia_split():
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
trivia_doc = runtime.Matcher(
builder.MatcherTable(ParseTable([], [], set()), {}, {}),
builder.MatcherTable(ParseTable([], [], set(), {}), {}, {}),
TRIVIA_MODES,
).apply_post_trivia(
token.post_trivia,