Move the examples into tests
This commit is contained in:
parent
e04aa1966e
commit
f6bc2ccea8
2 changed files with 181 additions and 141 deletions
4
makefile
4
makefile
|
|
@ -1,3 +1,7 @@
|
||||||
|
# The first test just makes sure we don't have any particular dependencies at
|
||||||
|
# all, so it can be dropped into a random unrelated project for build
|
||||||
|
# purposes.
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test:
|
||||||
|
python3 ./parser/parser.py
|
||||||
pdm run pytest
|
pdm run pytest
|
||||||
|
|
|
||||||
|
|
@ -7,11 +7,6 @@ import parser.runtime as runtime
|
||||||
|
|
||||||
from parser import Grammar, seq, rule, Terminal
|
from parser import Grammar, seq, rule, Terminal
|
||||||
|
|
||||||
PLUS = Terminal("+")
|
|
||||||
LPAREN = Terminal("(")
|
|
||||||
RPAREN = Terminal(")")
|
|
||||||
IDENTIFIER = Terminal("id")
|
|
||||||
|
|
||||||
|
|
||||||
class Tokens:
|
class Tokens:
|
||||||
def __init__(self, *toks: Terminal):
|
def __init__(self, *toks: Terminal):
|
||||||
|
|
@ -40,7 +35,15 @@ def _tree(treeform) -> runtime.Tree | runtime.TokenValue:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LR0Grammar(Grammar):
|
def test_lr0_lr0():
|
||||||
|
"""An LR0 grammar should work with an LR0 generator."""
|
||||||
|
|
||||||
|
PLUS = Terminal("+")
|
||||||
|
LPAREN = Terminal("(")
|
||||||
|
RPAREN = Terminal(")")
|
||||||
|
IDENTIFIER = Terminal("id")
|
||||||
|
|
||||||
|
class LR0Grammar(Grammar):
|
||||||
start = "E"
|
start = "E"
|
||||||
generator = parser.GenerateLR0
|
generator = parser.GenerateLR0
|
||||||
|
|
||||||
|
|
@ -52,17 +55,171 @@ class LR0Grammar(Grammar):
|
||||||
def T(self):
|
def T(self):
|
||||||
return seq(LPAREN, self.E, RPAREN) | IDENTIFIER
|
return seq(LPAREN, self.E, RPAREN) | IDENTIFIER
|
||||||
|
|
||||||
|
|
||||||
def test_lr0_lr0():
|
|
||||||
"""An LR0 grammar should work with an LR0 generator."""
|
|
||||||
table = LR0Grammar().build_table()
|
table = LR0Grammar().build_table()
|
||||||
parser = runtime.Parser(table)
|
tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN))
|
||||||
tree, errors = parser.parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN))
|
|
||||||
|
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
|
assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")")))
|
||||||
|
|
||||||
|
|
||||||
|
def test_lr0_shift_reduce():
|
||||||
|
"""This one should not work in LR0- it has a shift/reduce conflict, but works in SLR1."""
|
||||||
|
|
||||||
|
PLUS = Terminal("+")
|
||||||
|
LPAREN = Terminal("(")
|
||||||
|
RPAREN = Terminal(")")
|
||||||
|
LSQUARE = Terminal("[")
|
||||||
|
RSQUARE = Terminal("]")
|
||||||
|
IDENTIFIER = Terminal("id")
|
||||||
|
|
||||||
|
class TestGrammar(Grammar):
|
||||||
|
start = "E"
|
||||||
|
generator = parser.GenerateLR0
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def E(self):
|
||||||
|
return seq(self.E, PLUS, self.T) | self.T
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def T(self):
|
||||||
|
return (
|
||||||
|
seq(LPAREN, self.E, RPAREN) | IDENTIFIER | seq(IDENTIFIER, LSQUARE, self.E, RSQUARE)
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(parser.AmbiguityError):
|
||||||
|
TestGrammar().build_table()
|
||||||
|
|
||||||
|
TestGrammar().build_table(generator=parser.GenerateSLR1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_lr0_reduce_reduce():
|
||||||
|
"""This one should not work, it has a reduce-reduce conflict."""
|
||||||
|
|
||||||
|
PLUS = Terminal("+")
|
||||||
|
EQUAL = Terminal("=")
|
||||||
|
LPAREN = Terminal("(")
|
||||||
|
RPAREN = Terminal(")")
|
||||||
|
IDENTIFIER = Terminal("id")
|
||||||
|
|
||||||
|
class TestGrammar(Grammar):
|
||||||
|
start = "E"
|
||||||
|
generator = parser.GenerateLR0
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def E(self):
|
||||||
|
return seq(self.E, PLUS, self.T) | self.T | seq(self.V, EQUAL, self.E)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def T(self):
|
||||||
|
return seq(LPAREN, self.E, RPAREN) | IDENTIFIER
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def V(self):
|
||||||
|
return IDENTIFIER
|
||||||
|
|
||||||
|
with pytest.raises(parser.AmbiguityError):
|
||||||
|
TestGrammar().build_table()
|
||||||
|
|
||||||
|
|
||||||
|
def test_lr0_empty():
|
||||||
|
"""LR0 can't handle empty productions because it doesn't know when to reduce."""
|
||||||
|
BOOP = Terminal("boop")
|
||||||
|
BEEP = Terminal("beep")
|
||||||
|
|
||||||
|
class TestGrammar(Grammar):
|
||||||
|
start = "E"
|
||||||
|
generator = parser.GenerateLR0
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def E(self):
|
||||||
|
return seq(self.F, BOOP)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def F(self):
|
||||||
|
return BEEP | parser.Nothing
|
||||||
|
|
||||||
|
with pytest.raises(parser.AmbiguityError):
|
||||||
|
TestGrammar().build_table()
|
||||||
|
|
||||||
|
|
||||||
|
def test_grammar_aho_ullman_1():
|
||||||
|
EQUAL = Terminal("=")
|
||||||
|
STAR = Terminal("*")
|
||||||
|
ID = Terminal("id")
|
||||||
|
|
||||||
|
class TestGrammar(Grammar):
|
||||||
|
start = "S"
|
||||||
|
generator = parser.GenerateSLR1
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def S(self):
|
||||||
|
return seq(self.L, EQUAL, self.R) | self.R
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def L(self):
|
||||||
|
return seq(STAR, self.R) | ID
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def R(self):
|
||||||
|
return self.L
|
||||||
|
|
||||||
|
with pytest.raises(parser.AmbiguityError):
|
||||||
|
TestGrammar().build_table()
|
||||||
|
|
||||||
|
TestGrammar().build_table(generator=parser.GenerateLR1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_grammar_aho_ullman_2():
|
||||||
|
A = Terminal("a")
|
||||||
|
B = Terminal("b")
|
||||||
|
|
||||||
|
class TestGrammar(Grammar):
|
||||||
|
start = "S"
|
||||||
|
generator = parser.GenerateSLR1
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def S(self):
|
||||||
|
return seq(self.X, self.X)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def X(self):
|
||||||
|
return seq(A, self.X) | B
|
||||||
|
|
||||||
|
TestGrammar().build_table()
|
||||||
|
TestGrammar().build_table(generator=parser.GenerateLR1)
|
||||||
|
TestGrammar().build_table(generator=parser.GenerateLALR)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fun_lalr():
|
||||||
|
PLUS = Terminal("+")
|
||||||
|
INT = Terminal("int")
|
||||||
|
ID = Terminal("id")
|
||||||
|
LPAREN = Terminal("(")
|
||||||
|
RPAREN = Terminal(")")
|
||||||
|
|
||||||
|
class TestGrammar(Grammar):
|
||||||
|
start = "S"
|
||||||
|
generator = parser.GenerateLALR
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def S(self):
|
||||||
|
return seq(self.V, self.E)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def E(self):
|
||||||
|
return self.F | seq(self.E, PLUS, self.F)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def F(self):
|
||||||
|
return self.V | INT | seq(LPAREN, self.E, RPAREN)
|
||||||
|
|
||||||
|
@rule
|
||||||
|
def V(self):
|
||||||
|
return ID
|
||||||
|
|
||||||
|
TestGrammar().build_table()
|
||||||
|
|
||||||
|
|
||||||
def test_conflicting_names():
|
def test_conflicting_names():
|
||||||
"""Terminals and nonterminals cannot have the same name.
|
"""Terminals and nonterminals cannot have the same name.
|
||||||
|
|
||||||
|
|
@ -88,124 +245,3 @@ def test_conflicting_names():
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
TestGrammar().build_table()
|
TestGrammar().build_table()
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# Examples
|
|
||||||
###############################################################################
|
|
||||||
# def examples():
|
|
||||||
# def dump_grammar(grammar):
|
|
||||||
# for name, symbols in grammar:
|
|
||||||
# print(f"{name} -> {symbols}")
|
|
||||||
# print()
|
|
||||||
|
|
||||||
|
|
||||||
# # This one doesn't work with LR0, though, it has a shift/reduce conflict.
|
|
||||||
# print("grammar_lr0_shift_reduce (LR0):")
|
|
||||||
# grammar_lr0_shift_reduce = grammar_simple + [
|
|
||||||
# ("T", ["id", "[", "E", "]"]),
|
|
||||||
# ]
|
|
||||||
# try:
|
|
||||||
# gen = GenerateLR0("E", grammar_lr0_shift_reduce)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# assert False
|
|
||||||
# except ValueError as e:
|
|
||||||
# print(e)
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# # Nor does this: it has a reduce/reduce conflict.
|
|
||||||
# print("grammar_lr0_reduce_reduce (LR0):")
|
|
||||||
# grammar_lr0_reduce_reduce = grammar_simple + [
|
|
||||||
# ("E", ["V", "=", "E"]),
|
|
||||||
# ("V", ["id"]),
|
|
||||||
# ]
|
|
||||||
# try:
|
|
||||||
# gen = GenerateLR0("E", grammar_lr0_reduce_reduce)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# assert False
|
|
||||||
# except ValueError as e:
|
|
||||||
# print(e)
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# # Nullable symbols just don't work with constructs like this, because you can't
|
|
||||||
# # look ahead to figure out if you should reduce an empty 'F' or not.
|
|
||||||
# print("grammar_nullable (LR0):")
|
|
||||||
# grammar_nullable = [
|
|
||||||
# ("E", ["F", "boop"]),
|
|
||||||
# ("F", ["beep"]),
|
|
||||||
# ("F", []),
|
|
||||||
# ]
|
|
||||||
# try:
|
|
||||||
# gen = GenerateLR0("E", grammar_nullable)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# assert False
|
|
||||||
# except ValueError as e:
|
|
||||||
# print(e)
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# print("grammar_lr0_shift_reduce (SLR1):")
|
|
||||||
# dump_grammar(grammar_lr0_shift_reduce)
|
|
||||||
# gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
|
|
||||||
# print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}")
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# print(table.format())
|
|
||||||
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True)
|
|
||||||
# print(format_node(tree) + "\n")
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# # SLR1 can't handle this.
|
|
||||||
# print("grammar_aho_ullman_1 (SLR1):")
|
|
||||||
# grammar_aho_ullman_1 = [
|
|
||||||
# ("S", ["L", "=", "R"]),
|
|
||||||
# ("S", ["R"]),
|
|
||||||
# ("L", ["*", "R"]),
|
|
||||||
# ("L", ["id"]),
|
|
||||||
# ("R", ["L"]),
|
|
||||||
# ]
|
|
||||||
# try:
|
|
||||||
# gen = GenerateSLR1("S", grammar_aho_ullman_1)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# assert False
|
|
||||||
# except ValueError as e:
|
|
||||||
# print(e)
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# # Here's an example with a full LR1 grammar, though.
|
|
||||||
# print("grammar_aho_ullman_2 (LR1):")
|
|
||||||
# grammar_aho_ullman_2 = [
|
|
||||||
# ("S", ["X", "X"]),
|
|
||||||
# ("X", ["a", "X"]),
|
|
||||||
# ("X", ["b"]),
|
|
||||||
# ]
|
|
||||||
# gen = GenerateLR1("S", grammar_aho_ullman_2)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# print(table.format())
|
|
||||||
# parse(table, ["b", "a", "a", "b"], trace=True)
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# # What happens if we do LALR to it?
|
|
||||||
# print("grammar_aho_ullman_2 (LALR):")
|
|
||||||
# gen = GenerateLALR("S", grammar_aho_ullman_2)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# print(table.format())
|
|
||||||
# print()
|
|
||||||
|
|
||||||
# # A fun LALAR grammar.
|
|
||||||
# print("grammar_lalr:")
|
|
||||||
# grammar_lalr = [
|
|
||||||
# ("S", ["V", "E"]),
|
|
||||||
# ("E", ["F"]),
|
|
||||||
# ("E", ["E", "+", "F"]),
|
|
||||||
# ("F", ["V"]),
|
|
||||||
# ("F", ["int"]),
|
|
||||||
# ("F", ["(", "E", ")"]),
|
|
||||||
# ("V", ["id"]),
|
|
||||||
# ]
|
|
||||||
# gen = GenerateLALR("S", grammar_lalr)
|
|
||||||
# table = gen.gen_table()
|
|
||||||
# print(table.format())
|
|
||||||
# print()
|
|
||||||
|
|
||||||
|
|
||||||
# if __name__ == "__main__":
|
|
||||||
# examples()
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue