[all] A whole new style for grammars
Say good by to the sea of `self.`!
This commit is contained in:
parent
d6f1e7aba1
commit
5064a768e7
10 changed files with 1097 additions and 1318 deletions
|
|
@ -11,138 +11,141 @@ import parser.runtime as runtime
|
|||
|
||||
# Tests based on
|
||||
# https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
||||
class LGrammar(Grammar):
|
||||
start = "File"
|
||||
trivia = ["BLANKS"]
|
||||
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
TRUE = Terminal("TRUE", "true")
|
||||
FALSE = Terminal("FALSE", "false")
|
||||
INT = Terminal("INT", Re.set(("0", "9")).plus())
|
||||
FN = Terminal("FN", "fn")
|
||||
ARROW = Terminal("ARROW", "->")
|
||||
COMMA = Terminal("COMMA", ",")
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
LCURLY = Terminal("LCURLY", "{")
|
||||
RCURLY = Terminal("RCURLY", "}")
|
||||
COLON = Terminal("COLON", ":")
|
||||
SEMICOLON = Terminal("SEMICOLON", ";")
|
||||
LET = Terminal("LET", "let")
|
||||
EQUAL = Terminal("EQUAL", "=")
|
||||
RETURN = Terminal("RETURN", "return")
|
||||
PLUS = Terminal("PLUS", "+")
|
||||
MINUS = Terminal("MINUS", "-")
|
||||
STAR = Terminal("STAR", "*")
|
||||
SLASH = Terminal("SLASH", "/")
|
||||
|
||||
NAME = Terminal(
|
||||
"NAME",
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@rule
|
||||
def File():
|
||||
# TODO: Make lists easier
|
||||
return _functions
|
||||
|
||||
@rule
|
||||
def _functions():
|
||||
return Function | (_functions + Function)
|
||||
|
||||
@rule
|
||||
def Function():
|
||||
return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block
|
||||
|
||||
@rule
|
||||
def ParamList():
|
||||
return LPAREN + opt(_parameters) + RPAREN
|
||||
|
||||
@rule
|
||||
def _parameters():
|
||||
# NOTE: The ungrammar in the reference does not talk about commas
|
||||
# required between parameters so this massages it to make them
|
||||
# required. Commas are in the list not the param, which is more
|
||||
# awkward for processing but not terminally so.
|
||||
return (Param + opt(COMMA)) | (Param + COMMA + _parameters)
|
||||
|
||||
@rule
|
||||
def Param():
|
||||
return NAME + COLON + TypeExpr
|
||||
|
||||
@rule
|
||||
def TypeExpr():
|
||||
return NAME
|
||||
|
||||
@rule
|
||||
def Block():
|
||||
return LCURLY + opt(_statements) + RCURLY
|
||||
|
||||
@rule
|
||||
def _statements():
|
||||
return Stmt | _statements + Stmt
|
||||
|
||||
@rule
|
||||
def Stmt():
|
||||
return StmtExpr | StmtLet | StmtReturn
|
||||
|
||||
@rule
|
||||
def StmtExpr():
|
||||
return Expr + SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtLet():
|
||||
return LET + NAME + EQUAL + Expr + SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtReturn():
|
||||
return RETURN + Expr + SEMICOLON
|
||||
|
||||
@rule
|
||||
def Expr():
|
||||
return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall
|
||||
|
||||
@rule
|
||||
def ExprLiteral():
|
||||
return INT | TRUE | FALSE
|
||||
|
||||
@rule
|
||||
def ExprName():
|
||||
return NAME
|
||||
|
||||
@rule
|
||||
def ExprParen():
|
||||
return LPAREN + Expr + RPAREN
|
||||
|
||||
@rule
|
||||
def ExprBinary():
|
||||
return Expr + (PLUS | MINUS | STAR | SLASH) + Expr
|
||||
|
||||
@rule
|
||||
def ExprCall():
|
||||
return Expr + ArgList
|
||||
|
||||
@rule
|
||||
def ArgList():
|
||||
return LPAREN + opt(_arg_star) + RPAREN
|
||||
|
||||
@rule
|
||||
def _arg_star():
|
||||
# Again, a deviation from the original. See _parameters.
|
||||
return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star)
|
||||
|
||||
LGrammar = Grammar(
|
||||
start=File,
|
||||
trivia=[BLANKS],
|
||||
# Need a little bit of disambiguation for the symbol involved.
|
||||
precedence = [
|
||||
(Assoc.LEFT, ["PLUS", "MINUS"]),
|
||||
(Assoc.LEFT, ["STAR", "SLASH"]),
|
||||
(Assoc.LEFT, ["LPAREN"]),
|
||||
]
|
||||
(Assoc.LEFT, [PLUS, MINUS]),
|
||||
(Assoc.LEFT, [STAR, SLASH]),
|
||||
(Assoc.LEFT, [LPAREN]),
|
||||
],
|
||||
)
|
||||
|
||||
@rule
|
||||
def File(self):
|
||||
# TODO: Make lists easier
|
||||
return self._functions
|
||||
|
||||
@rule
|
||||
def _functions(self):
|
||||
return self.Function | (self._functions + self.Function)
|
||||
|
||||
@rule
|
||||
def Function(self):
|
||||
return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block
|
||||
|
||||
@rule
|
||||
def ParamList(self):
|
||||
return self.LPAREN + opt(self._parameters) + self.RPAREN
|
||||
|
||||
@rule
|
||||
def _parameters(self):
|
||||
# NOTE: The ungrammar in the reference does not talk about commas required between parameters
|
||||
# so this massages it to make them required. Commas are in the list not the param, which
|
||||
# is more awkward for processing but not terminally so.
|
||||
return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters)
|
||||
|
||||
@rule
|
||||
def Param(self):
|
||||
return self.NAME + self.COLON + self.TypeExpr
|
||||
|
||||
@rule
|
||||
def TypeExpr(self):
|
||||
return self.NAME
|
||||
|
||||
@rule
|
||||
def Block(self):
|
||||
return self.LCURLY + opt(self._statements) + self.RCURLY
|
||||
|
||||
@rule
|
||||
def _statements(self):
|
||||
return self.Stmt | self._statements + self.Stmt
|
||||
|
||||
@rule
|
||||
def Stmt(self):
|
||||
return self.StmtExpr | self.StmtLet | self.StmtReturn
|
||||
|
||||
@rule
|
||||
def StmtExpr(self):
|
||||
return self.Expr + self.SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtLet(self):
|
||||
return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON
|
||||
|
||||
@rule
|
||||
def StmtReturn(self):
|
||||
return self.RETURN + self.Expr + self.SEMICOLON
|
||||
|
||||
@rule
|
||||
def Expr(self):
|
||||
return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall
|
||||
|
||||
@rule
|
||||
def ExprLiteral(self):
|
||||
return self.INT | self.TRUE | self.FALSE
|
||||
|
||||
@rule
|
||||
def ExprName(self):
|
||||
return self.NAME
|
||||
|
||||
@rule
|
||||
def ExprParen(self):
|
||||
return self.LPAREN + self.Expr + self.RPAREN
|
||||
|
||||
@rule
|
||||
def ExprBinary(self):
|
||||
return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr
|
||||
|
||||
@rule
|
||||
def ExprCall(self):
|
||||
return self.Expr + self.ArgList
|
||||
|
||||
@rule
|
||||
def ArgList(self):
|
||||
return self.LPAREN + opt(self._arg_star) + self.RPAREN
|
||||
|
||||
@rule
|
||||
def _arg_star(self):
|
||||
# Again, a deviation from the original. See _parameters.
|
||||
return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
TRUE = Terminal("true")
|
||||
FALSE = Terminal("false")
|
||||
INT = Terminal(Re.set(("0", "9")).plus())
|
||||
FN = Terminal("fn")
|
||||
ARROW = Terminal("->")
|
||||
COMMA = Terminal(",")
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
LCURLY = Terminal("{")
|
||||
RCURLY = Terminal("}")
|
||||
COLON = Terminal(":")
|
||||
SEMICOLON = Terminal(";")
|
||||
LET = Terminal("let")
|
||||
EQUAL = Terminal("=")
|
||||
RETURN = Terminal("return")
|
||||
PLUS = Terminal("+")
|
||||
MINUS = Terminal("-")
|
||||
STAR = Terminal("*")
|
||||
SLASH = Terminal("/")
|
||||
|
||||
NAME = Terminal(
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
L_PARSE_TABLE = LGrammar().build_table()
|
||||
L_LEXER_TABLE = LGrammar().compile_lexer()
|
||||
L_PARSE_TABLE = LGrammar.build_table()
|
||||
L_LEXER_TABLE = LGrammar.compile_lexer()
|
||||
|
||||
|
||||
def test_matklad_one():
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import pytest
|
||||
|
||||
import parser
|
||||
import parser.runtime as runtime
|
||||
|
||||
from parser import Grammar, seq, rule, Terminal
|
||||
|
|
@ -40,117 +39,68 @@ def _tree(treeform, count=0) -> runtime.Tree | runtime.TokenValue:
|
|||
def test_lr0_lr0():
|
||||
"""An LR0 grammar should work with an LR0 generator."""
|
||||
|
||||
class G(Grammar):
|
||||
start = "E"
|
||||
# generator = parser.GenerateLR0
|
||||
PLUS = Terminal("+", "+")
|
||||
LPAREN = Terminal("(", "(")
|
||||
RPAREN = Terminal(")", ")")
|
||||
IDENTIFIER = Terminal("id", "id")
|
||||
|
||||
@rule
|
||||
def E(self):
|
||||
return seq(self.E, self.PLUS, self.T) | self.T
|
||||
@rule
|
||||
def E():
|
||||
return seq(E, PLUS, T) | T
|
||||
|
||||
@rule
|
||||
def T(self):
|
||||
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
|
||||
@rule
|
||||
def T():
|
||||
return seq(LPAREN, E, RPAREN) | IDENTIFIER
|
||||
|
||||
PLUS = Terminal("+", name="+")
|
||||
LPAREN = Terminal("(", name="(")
|
||||
RPAREN = Terminal(")", name=")")
|
||||
IDENTIFIER = Terminal("id", name="id")
|
||||
G = Grammar(start=E)
|
||||
|
||||
table = G().build_table()
|
||||
tree, errors = runtime.Parser(table).parse(
|
||||
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
|
||||
)
|
||||
table = G.build_table()
|
||||
tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN))
|
||||
|
||||
assert errors == []
|
||||
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
|
||||
|
||||
|
||||
def test_all_generators():
|
||||
"""This grammar should work with everything honestly."""
|
||||
|
||||
class G(Grammar):
|
||||
start = "E"
|
||||
|
||||
@rule
|
||||
def E(self):
|
||||
return seq(self.E, self.PLUS, self.T) | self.T
|
||||
|
||||
@rule
|
||||
def T(self):
|
||||
return seq(self.LPAREN, self.E, self.RPAREN) | self.IDENTIFIER
|
||||
|
||||
PLUS = Terminal("+", name="+")
|
||||
LPAREN = Terminal("(", name="(")
|
||||
RPAREN = Terminal(")", name=")")
|
||||
IDENTIFIER = Terminal("id", name="id")
|
||||
|
||||
GENERATORS = [
|
||||
# parser.GenerateLR0,
|
||||
# parser.GeneratePager,
|
||||
parser.ParserGenerator,
|
||||
]
|
||||
for generator in GENERATORS:
|
||||
table = G().build_table(generator=generator)
|
||||
tree, errors = runtime.Parser(table).parse(
|
||||
Tokens(G.IDENTIFIER, G.PLUS, G.LPAREN, G.IDENTIFIER, G.RPAREN)
|
||||
)
|
||||
|
||||
print("\n")
|
||||
print(generator)
|
||||
print(f"{table.format()}")
|
||||
|
||||
assert errors == []
|
||||
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
|
||||
|
||||
|
||||
def test_grammar_aho_ullman_2():
|
||||
class TestGrammar(Grammar):
|
||||
start = "S"
|
||||
@rule
|
||||
def S():
|
||||
return seq(X, X)
|
||||
|
||||
@rule
|
||||
def S(self):
|
||||
return seq(self.X, self.X)
|
||||
@rule
|
||||
def X():
|
||||
return seq(A, X) | B
|
||||
|
||||
@rule
|
||||
def X(self):
|
||||
return seq(self.A, self.X) | self.B
|
||||
A = Terminal("A", "a")
|
||||
B = Terminal("B", "b")
|
||||
|
||||
A = Terminal("a")
|
||||
B = Terminal("b")
|
||||
|
||||
TestGrammar().build_table(generator=parser.ParserGenerator)
|
||||
# TestGrammar().build_table(generator=parser.GeneratePager)
|
||||
Grammar(start=S).build_table()
|
||||
|
||||
|
||||
def test_fun_lalr():
|
||||
@rule
|
||||
def S():
|
||||
return seq(V, E)
|
||||
|
||||
class TestGrammar(Grammar):
|
||||
start = "S"
|
||||
@rule
|
||||
def E():
|
||||
return F | seq(E, PLUS, F)
|
||||
|
||||
@rule
|
||||
def S(self):
|
||||
return seq(self.V, self.E)
|
||||
@rule
|
||||
def F():
|
||||
return V | INT | seq(LPAREN, E, RPAREN)
|
||||
|
||||
@rule
|
||||
def E(self):
|
||||
return self.F | seq(self.E, self.PLUS, self.F)
|
||||
@rule
|
||||
def V():
|
||||
return ID
|
||||
|
||||
@rule
|
||||
def F(self):
|
||||
return self.V | self.INT | seq(self.LPAREN, self.E, self.RPAREN)
|
||||
PLUS = Terminal("PLUS", "+")
|
||||
INT = Terminal("INT", "int")
|
||||
ID = Terminal("ID", "id")
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
|
||||
@rule
|
||||
def V(self):
|
||||
return self.ID
|
||||
|
||||
PLUS = Terminal("+")
|
||||
INT = Terminal("int")
|
||||
ID = Terminal("id")
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
|
||||
TestGrammar().build_table()
|
||||
Grammar(start=S).build_table()
|
||||
|
||||
|
||||
def test_conflicting_names():
|
||||
|
|
@ -167,43 +117,28 @@ def test_conflicting_names():
|
|||
to understand.
|
||||
"""
|
||||
|
||||
class TestGrammar(Grammar):
|
||||
start = "IDENTIFIER"
|
||||
@rule("IDENTIFIER")
|
||||
def identifier():
|
||||
return IDENTIFIER
|
||||
|
||||
@rule("IDENTIFIER")
|
||||
def identifier(self):
|
||||
return self.IDENTIFIER
|
||||
|
||||
IDENTIFIER = Terminal("Identifier")
|
||||
IDENTIFIER = Terminal("IDENTIFIER", "Identifier")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
TestGrammar().build_table()
|
||||
Grammar(start=identifier).build_table()
|
||||
|
||||
|
||||
def test_grammar_ignore_trivia():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
@rule
|
||||
def sentence():
|
||||
return WORD | seq(sentence, WORD)
|
||||
|
||||
trivia = ["BLANK"]
|
||||
WORD = Terminal("WORD", "blah")
|
||||
BLANK = Terminal("BLANK", " ")
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
table = G().build_table()
|
||||
table = Grammar(start=sentence, trivia=[BLANK]).build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
tree, errors = runtime.Parser(table).parse(
|
||||
Tokens(
|
||||
G.WORD,
|
||||
G.BLANK,
|
||||
G.WORD,
|
||||
G.BLANK,
|
||||
)
|
||||
)
|
||||
tree, errors = runtime.Parser(table).parse(Tokens(WORD, BLANK, WORD, BLANK))
|
||||
|
||||
assert errors == []
|
||||
assert tree == runtime.Tree(
|
||||
|
|
@ -234,135 +169,3 @@ def test_grammar_ignore_trivia():
|
|||
),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_grammar_unknown_trivia():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
trivia = ["BLANK"]
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
G().build_table()
|
||||
|
||||
|
||||
def test_grammar_trivia_symbol():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
trivia = [BLANK]
|
||||
|
||||
table = G().build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
|
||||
def test_grammar_trivia_constructor():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(trivia=[self.BLANK])
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
table = G().build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
|
||||
def test_grammar_trivia_constructor_string():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(trivia=["BLANK"])
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
BLANK = Terminal(" ")
|
||||
|
||||
table = G().build_table()
|
||||
assert "BLANK" in table.trivia
|
||||
|
||||
|
||||
def test_grammar_trivia_constructor_string_unknown():
|
||||
class G(Grammar):
|
||||
start = "sentence"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(trivia=["BLANK"])
|
||||
|
||||
@rule
|
||||
def sentence(self):
|
||||
return self.WORD | seq(self.sentence, self.WORD)
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
G().build_table()
|
||||
|
||||
|
||||
def test_grammar_name_implicit():
|
||||
class FooGrammar(Grammar):
|
||||
start = "x"
|
||||
|
||||
@rule
|
||||
def x(self):
|
||||
return self.WORD
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
assert FooGrammar().name == "foo"
|
||||
|
||||
|
||||
def test_grammar_name_explicit_member():
|
||||
class FooGrammar(Grammar):
|
||||
start = "x"
|
||||
|
||||
name = "bar"
|
||||
|
||||
@rule
|
||||
def x(self):
|
||||
return self.WORD
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
assert FooGrammar().name == "bar"
|
||||
|
||||
|
||||
def test_grammar_name_explicit_constructor():
|
||||
class FooGrammar(Grammar):
|
||||
start = "x"
|
||||
|
||||
name = "bar"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(name="baz")
|
||||
|
||||
@rule
|
||||
def x(self):
|
||||
return self.WORD
|
||||
|
||||
WORD = Terminal("blah")
|
||||
|
||||
assert FooGrammar().name == "baz"
|
||||
|
|
|
|||
|
|
@ -354,32 +354,33 @@ def test_edge_list_always_sorted(points: list[tuple[int, int]]):
|
|||
|
||||
|
||||
def test_lexer_compile():
|
||||
class LexTest(Grammar):
|
||||
@rule
|
||||
def foo(self):
|
||||
return self.IS
|
||||
@rule
|
||||
def foo():
|
||||
# NOTE: This is a hack to ensure the terminals are reachable. :P
|
||||
return IS | AS | IDENTIFIER
|
||||
|
||||
start = "foo"
|
||||
|
||||
IS = Terminal("is")
|
||||
AS = Terminal("as")
|
||||
IDENTIFIER = Terminal(
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
)
|
||||
IS = Terminal("IS", "is")
|
||||
AS = Terminal("AS", "as")
|
||||
IDENTIFIER = Terminal(
|
||||
"IDENTIFIER",
|
||||
Re.seq(
|
||||
Re.set(("a", "z"), ("A", "Z"), "_"),
|
||||
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
|
||||
)
|
||||
BLANKS = Terminal(Re.set("\r", "\n", "\t", " ").plus())
|
||||
)
|
||||
BLANKS = Terminal("BLANKS", Re.set("\r", "\n", "\t", " ").plus())
|
||||
|
||||
lexer = LexTest().compile_lexer()
|
||||
|
||||
LexTest = Grammar(start=foo, trivia=[BLANKS])
|
||||
lexer = LexTest.compile_lexer()
|
||||
dump_lexer_table(lexer)
|
||||
tokens = list(generic_tokenize("xy is ass", lexer))
|
||||
assert tokens == [
|
||||
(LexTest.IDENTIFIER, 0, 2),
|
||||
(LexTest.BLANKS, 2, 1),
|
||||
(LexTest.IS, 3, 2),
|
||||
(LexTest.BLANKS, 5, 1),
|
||||
(LexTest.IDENTIFIER, 6, 3),
|
||||
(IDENTIFIER, 0, 2),
|
||||
(BLANKS, 2, 1),
|
||||
(IS, 3, 2),
|
||||
(BLANKS, 5, 1),
|
||||
(IDENTIFIER, 6, 3),
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -387,34 +388,35 @@ def test_lexer_compile():
|
|||
def test_lexer_numbers(n: float):
|
||||
assume(math.isfinite(n))
|
||||
|
||||
class LexTest(Grammar):
|
||||
@rule
|
||||
def number(self):
|
||||
return self.NUMBER
|
||||
@rule
|
||||
def number():
|
||||
return NUMBER
|
||||
|
||||
start = "number"
|
||||
|
||||
NUMBER = Terminal(
|
||||
NUMBER = Terminal(
|
||||
"NUMBER",
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
Re.literal("."),
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
Re.literal("."),
|
||||
Re.set(("0", "9")).plus(),
|
||||
).question(),
|
||||
Re.seq(
|
||||
Re.set("e", "E"),
|
||||
Re.set("+", "-").question(),
|
||||
Re.set(("0", "9")).plus(),
|
||||
).question(),
|
||||
)
|
||||
).question(),
|
||||
Re.seq(
|
||||
Re.set("e", "E"),
|
||||
Re.set("+", "-").question(),
|
||||
Re.set(("0", "9")).plus(),
|
||||
).question(),
|
||||
)
|
||||
)
|
||||
|
||||
lexer = LexTest().compile_lexer()
|
||||
|
||||
LexTest = Grammar(start=number)
|
||||
|
||||
lexer = LexTest.compile_lexer()
|
||||
dump_lexer_table(lexer)
|
||||
|
||||
number_string = str(n)
|
||||
|
||||
tokens = list(generic_tokenize(number_string, lexer))
|
||||
assert tokens == [
|
||||
(LexTest.NUMBER, 0, len(number_string)),
|
||||
(NUMBER, 0, len(number_string)),
|
||||
]
|
||||
|
|
|
|||
|
|
@ -23,69 +23,66 @@ import parser.wadler.builder as builder
|
|||
import parser.wadler.runtime as runtime
|
||||
|
||||
|
||||
class JsonGrammar(Grammar):
|
||||
start = "root"
|
||||
|
||||
trivia = ["BLANKS"]
|
||||
|
||||
def make_json_grammar():
|
||||
@rule
|
||||
def root(self):
|
||||
return self.value
|
||||
def root():
|
||||
return value
|
||||
|
||||
@rule(transparent=True)
|
||||
def value(self):
|
||||
def value():
|
||||
return (
|
||||
self.object
|
||||
| self.array
|
||||
| self.NUMBER
|
||||
| self.TRUE
|
||||
| self.FALSE
|
||||
| self.NULL
|
||||
| self.STRING
|
||||
object
|
||||
| array
|
||||
| NUMBER
|
||||
| TRUE
|
||||
| FALSE
|
||||
| NULL
|
||||
| STRING
|
||||
)
|
||||
|
||||
@rule
|
||||
def object(self):
|
||||
def object():
|
||||
return group(
|
||||
self.LCURLY + opt(indent(newline() + self._object_pairs)) + newline() + self.RCURLY
|
||||
LCURLY + opt(indent(newline() + _object_pairs)) + newline() + RCURLY
|
||||
)
|
||||
|
||||
@rule
|
||||
def _object_pairs(self):
|
||||
def _object_pairs():
|
||||
return alt(
|
||||
self.object_pair,
|
||||
self.object_pair + self.COMMA + newline(" ") + self._object_pairs,
|
||||
object_pair,
|
||||
object_pair + COMMA + newline(" ") + _object_pairs,
|
||||
)
|
||||
|
||||
@rule
|
||||
def object_pair(self):
|
||||
return group(self.STRING + self.COLON + indent(newline(" ") + self.value))
|
||||
def object_pair():
|
||||
return group(STRING + COLON + indent(newline(" ") + value))
|
||||
|
||||
@rule
|
||||
def array(self):
|
||||
def array():
|
||||
return group(
|
||||
self.LSQUARE + opt(indent(newline() + self._array_items)) + newline() + self.RSQUARE
|
||||
LSQUARE + opt(indent(newline() + _array_items)) + newline() + RSQUARE
|
||||
)
|
||||
|
||||
@rule
|
||||
def _array_items(self):
|
||||
def _array_items():
|
||||
return alt(
|
||||
self.value,
|
||||
self.value + self.COMMA + newline(" ") + self._array_items,
|
||||
value,
|
||||
value + COMMA + newline(" ") + _array_items,
|
||||
)
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
|
||||
|
||||
LCURLY = Terminal("{")
|
||||
RCURLY = Terminal("}")
|
||||
COMMA = Terminal(",")
|
||||
COLON = Terminal(":")
|
||||
LSQUARE = Terminal("[")
|
||||
RSQUARE = Terminal("]")
|
||||
TRUE = Terminal("true")
|
||||
FALSE = Terminal("false")
|
||||
NULL = Terminal("null")
|
||||
LCURLY = Terminal("LCURLY", "{")
|
||||
RCURLY = Terminal("RCURLY", "}")
|
||||
COMMA = Terminal("COMMA", ",")
|
||||
COLON = Terminal("COLON", ":")
|
||||
LSQUARE = Terminal("LSQUARE", "[")
|
||||
RSQUARE = Terminal("RSQUARE", "]")
|
||||
TRUE = Terminal("TRUE", "true")
|
||||
FALSE = Terminal("FALSE", "false")
|
||||
NULL = Terminal("NULL", "null")
|
||||
NUMBER = Terminal(
|
||||
"NUMBER",
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
|
|
@ -100,6 +97,7 @@ class JsonGrammar(Grammar):
|
|||
),
|
||||
)
|
||||
STRING = Terminal(
|
||||
"STRING",
|
||||
Re.seq(
|
||||
Re.literal('"'),
|
||||
(~Re.set('"', "\\") | (Re.set("\\") + Re.any())).star(),
|
||||
|
|
@ -107,8 +105,9 @@ class JsonGrammar(Grammar):
|
|||
)
|
||||
)
|
||||
|
||||
return Grammar(start=root, trivia=[BLANKS])
|
||||
|
||||
JSON = JsonGrammar()
|
||||
JSON = make_json_grammar()
|
||||
JSON_PARSER = JSON.build_table()
|
||||
JSON_LEXER = JSON.compile_lexer()
|
||||
|
||||
|
|
@ -228,47 +227,49 @@ def test_layout_basic():
|
|||
)
|
||||
|
||||
|
||||
class TG(Grammar):
|
||||
start = "root"
|
||||
trivia = ["BLANKS", "LINE_BREAK", "COMMENT"]
|
||||
def make_test_grammar():
|
||||
@rule
|
||||
def root():
|
||||
return _expression
|
||||
|
||||
@rule
|
||||
def root(self):
|
||||
return self._expression
|
||||
def _expression():
|
||||
return word | list
|
||||
|
||||
@rule
|
||||
def _expression(self):
|
||||
return self.word | self.list
|
||||
def list():
|
||||
return group(LPAREN, indent(nl, _expressions), nl, RPAREN)
|
||||
|
||||
@rule
|
||||
def list(self):
|
||||
return group(self.LPAREN, indent(nl, self._expressions), nl, self.RPAREN)
|
||||
def _expressions():
|
||||
return _expression | seq(_expressions, sp, _expression)
|
||||
|
||||
@rule
|
||||
def _expressions(self):
|
||||
return self._expression | seq(self._expressions, sp, self._expression)
|
||||
def word():
|
||||
return OK | seq(BREAK, br, BREAK)
|
||||
|
||||
@rule
|
||||
def word(self):
|
||||
return self.OK | seq(self.BREAK, br, self.BREAK)
|
||||
LPAREN = Terminal("LPAREN", "(")
|
||||
RPAREN = Terminal("RPAREN", ")")
|
||||
OK = Terminal("OK", "ok")
|
||||
BREAK = Terminal("BREAK", "break")
|
||||
|
||||
LPAREN = Terminal("(")
|
||||
RPAREN = Terminal(")")
|
||||
OK = Terminal("ok")
|
||||
BREAK = Terminal("break")
|
||||
|
||||
BLANKS = Terminal(Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal(Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
|
||||
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
|
||||
COMMENT = Terminal(
|
||||
"COMMENT",
|
||||
Re.seq(Re.literal(";"), Re.set("\n").invert().star()),
|
||||
trivia_mode=TriviaMode.LineComment,
|
||||
)
|
||||
|
||||
return Grammar(start=root, trivia=[BLANKS, LINE_BREAK, COMMENT], pretty_indent=" ")
|
||||
|
||||
TG = make_test_grammar()
|
||||
|
||||
|
||||
|
||||
def test_forced_break():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = "((ok ok) (ok break break ok) (ok ok ok ok))"
|
||||
|
||||
|
|
@ -276,29 +277,28 @@ def test_forced_break():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
"""
|
||||
(
|
||||
(ok ok)
|
||||
(
|
||||
ok
|
||||
break
|
||||
break
|
||||
ok
|
||||
)
|
||||
(ok ok ok ok)
|
||||
(ok ok)
|
||||
(
|
||||
ok
|
||||
break
|
||||
break
|
||||
ok
|
||||
)
|
||||
(ok ok ok ok)
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def test_maintaining_line_breaks():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok)
|
||||
; Don't break here.
|
||||
|
|
@ -316,30 +316,29 @@ def test_maintaining_line_breaks():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
"""
|
||||
(
|
||||
(ok ok)
|
||||
; Don't break here.
|
||||
(ok)
|
||||
*SPACE*
|
||||
; ^ Do keep this break though.
|
||||
(ok)
|
||||
*SPACE*
|
||||
; ^ This should only be one break.
|
||||
(ok)
|
||||
(ok ok)
|
||||
; Don't break here.
|
||||
(ok)
|
||||
*SPACE**SPACE*
|
||||
; ^ Do keep this break though.
|
||||
(ok)
|
||||
*SPACE**SPACE*
|
||||
; ^ This should only be one break.
|
||||
(ok)
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def test_trailing_trivia():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok)); Don't lose this!
|
||||
|
||||
|
|
@ -350,7 +349,7 @@ def test_trailing_trivia():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
|
|
@ -363,9 +362,8 @@ def test_trailing_trivia():
|
|||
|
||||
|
||||
def test_trailing_trivia_two():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok))
|
||||
|
||||
|
|
@ -376,7 +374,7 @@ def test_trailing_trivia_two():
|
|||
assert errors == []
|
||||
assert tree is not None
|
||||
|
||||
printer = runtime.Printer(builder.compile_pretty_table(g))
|
||||
printer = runtime.Printer(builder.compile_pretty_table(TG))
|
||||
result = printer.format_tree(tree, text, 200).apply_to_source(text)
|
||||
|
||||
assert result == _output(
|
||||
|
|
@ -389,9 +387,8 @@ def test_trailing_trivia_two():
|
|||
|
||||
|
||||
def test_trailing_trivia_split():
|
||||
g = TG()
|
||||
g_lexer = g.compile_lexer()
|
||||
g_parser = g.build_table()
|
||||
g_lexer = TG.compile_lexer()
|
||||
g_parser = TG.build_table()
|
||||
|
||||
text = """((ok ok)); Don't lose this!
|
||||
|
||||
|
|
@ -432,7 +429,7 @@ def test_trailing_trivia_split():
|
|||
print(f"{mode:25} {t.kind:10} {repr(text[t.start:t.end])}")
|
||||
|
||||
trivia_doc = runtime.Matcher(
|
||||
builder.MatcherTable(ParseTable([], [], set()), {}, {}),
|
||||
builder.MatcherTable(ParseTable([], [], set(), {}), {}, {}),
|
||||
TRIVIA_MODES,
|
||||
).apply_post_trivia(
|
||||
token.post_trivia,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue