diff --git a/makefile b/makefile index a5fb8a9..743eb52 100644 --- a/makefile +++ b/makefile @@ -1,3 +1,7 @@ +# The first test just makes sure we don't have any particular dependencies at +# all, so it can be dropped into a random unrelated project for build +# purposes. .PHONY: test test: + python3 ./parser/parser.py pdm run pytest diff --git a/tests/test_grammar.py b/tests/test_grammar.py index ba1525b..a320e06 100644 --- a/tests/test_grammar.py +++ b/tests/test_grammar.py @@ -7,11 +7,6 @@ import parser.runtime as runtime from parser import Grammar, seq, rule, Terminal -PLUS = Terminal("+") -LPAREN = Terminal("(") -RPAREN = Terminal(")") -IDENTIFIER = Terminal("id") - class Tokens: def __init__(self, *toks: Terminal): @@ -40,29 +35,191 @@ def _tree(treeform) -> runtime.Tree | runtime.TokenValue: ) -class LR0Grammar(Grammar): - start = "E" - generator = parser.GenerateLR0 - - @rule - def E(self): - return seq(self.E, PLUS, self.T) | self.T - - @rule - def T(self): - return seq(LPAREN, self.E, RPAREN) | IDENTIFIER - - def test_lr0_lr0(): """An LR0 grammar should work with an LR0 generator.""" + + PLUS = Terminal("+") + LPAREN = Terminal("(") + RPAREN = Terminal(")") + IDENTIFIER = Terminal("id") + + class LR0Grammar(Grammar): + start = "E" + generator = parser.GenerateLR0 + + @rule + def E(self): + return seq(self.E, PLUS, self.T) | self.T + + @rule + def T(self): + return seq(LPAREN, self.E, RPAREN) | IDENTIFIER + table = LR0Grammar().build_table() - parser = runtime.Parser(table) - tree, errors = parser.parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN)) + tree, errors = runtime.Parser(table).parse(Tokens(IDENTIFIER, PLUS, LPAREN, IDENTIFIER, RPAREN)) assert errors == [] assert tree == _tree(("E", ("E", ("T", "id")), "+", ("T", "(", ("E", ("T", "id")), ")"))) +def test_lr0_shift_reduce(): + """This one should not work in LR0- it has a shift/reduce conflict, but works in SLR1.""" + + PLUS = Terminal("+") + LPAREN = Terminal("(") + RPAREN = Terminal(")") + LSQUARE = Terminal("[") + RSQUARE = Terminal("]") + IDENTIFIER = Terminal("id") + + class TestGrammar(Grammar): + start = "E" + generator = parser.GenerateLR0 + + @rule + def E(self): + return seq(self.E, PLUS, self.T) | self.T + + @rule + def T(self): + return ( + seq(LPAREN, self.E, RPAREN) | IDENTIFIER | seq(IDENTIFIER, LSQUARE, self.E, RSQUARE) + ) + + with pytest.raises(parser.AmbiguityError): + TestGrammar().build_table() + + TestGrammar().build_table(generator=parser.GenerateSLR1) + + +def test_lr0_reduce_reduce(): + """This one should not work, it has a reduce-reduce conflict.""" + + PLUS = Terminal("+") + EQUAL = Terminal("=") + LPAREN = Terminal("(") + RPAREN = Terminal(")") + IDENTIFIER = Terminal("id") + + class TestGrammar(Grammar): + start = "E" + generator = parser.GenerateLR0 + + @rule + def E(self): + return seq(self.E, PLUS, self.T) | self.T | seq(self.V, EQUAL, self.E) + + @rule + def T(self): + return seq(LPAREN, self.E, RPAREN) | IDENTIFIER + + @rule + def V(self): + return IDENTIFIER + + with pytest.raises(parser.AmbiguityError): + TestGrammar().build_table() + + +def test_lr0_empty(): + """LR0 can't handle empty productions because it doesn't know when to reduce.""" + BOOP = Terminal("boop") + BEEP = Terminal("beep") + + class TestGrammar(Grammar): + start = "E" + generator = parser.GenerateLR0 + + @rule + def E(self): + return seq(self.F, BOOP) + + @rule + def F(self): + return BEEP | parser.Nothing + + with pytest.raises(parser.AmbiguityError): + TestGrammar().build_table() + + +def test_grammar_aho_ullman_1(): + EQUAL = Terminal("=") + STAR = Terminal("*") + ID = Terminal("id") + + class TestGrammar(Grammar): + start = "S" + generator = parser.GenerateSLR1 + + @rule + def S(self): + return seq(self.L, EQUAL, self.R) | self.R + + @rule + def L(self): + return seq(STAR, self.R) | ID + + @rule + def R(self): + return self.L + + with pytest.raises(parser.AmbiguityError): + TestGrammar().build_table() + + TestGrammar().build_table(generator=parser.GenerateLR1) + + +def test_grammar_aho_ullman_2(): + A = Terminal("a") + B = Terminal("b") + + class TestGrammar(Grammar): + start = "S" + generator = parser.GenerateSLR1 + + @rule + def S(self): + return seq(self.X, self.X) + + @rule + def X(self): + return seq(A, self.X) | B + + TestGrammar().build_table() + TestGrammar().build_table(generator=parser.GenerateLR1) + TestGrammar().build_table(generator=parser.GenerateLALR) + + +def test_fun_lalr(): + PLUS = Terminal("+") + INT = Terminal("int") + ID = Terminal("id") + LPAREN = Terminal("(") + RPAREN = Terminal(")") + + class TestGrammar(Grammar): + start = "S" + generator = parser.GenerateLALR + + @rule + def S(self): + return seq(self.V, self.E) + + @rule + def E(self): + return self.F | seq(self.E, PLUS, self.F) + + @rule + def F(self): + return self.V | INT | seq(LPAREN, self.E, RPAREN) + + @rule + def V(self): + return ID + + TestGrammar().build_table() + + def test_conflicting_names(): """Terminals and nonterminals cannot have the same name. @@ -88,124 +245,3 @@ def test_conflicting_names(): with pytest.raises(ValueError): TestGrammar().build_table() - - -############################################################################### -# Examples -############################################################################### -# def examples(): -# def dump_grammar(grammar): -# for name, symbols in grammar: -# print(f"{name} -> {symbols}") -# print() - - -# # This one doesn't work with LR0, though, it has a shift/reduce conflict. -# print("grammar_lr0_shift_reduce (LR0):") -# grammar_lr0_shift_reduce = grammar_simple + [ -# ("T", ["id", "[", "E", "]"]), -# ] -# try: -# gen = GenerateLR0("E", grammar_lr0_shift_reduce) -# table = gen.gen_table() -# assert False -# except ValueError as e: -# print(e) -# print() - -# # Nor does this: it has a reduce/reduce conflict. -# print("grammar_lr0_reduce_reduce (LR0):") -# grammar_lr0_reduce_reduce = grammar_simple + [ -# ("E", ["V", "=", "E"]), -# ("V", ["id"]), -# ] -# try: -# gen = GenerateLR0("E", grammar_lr0_reduce_reduce) -# table = gen.gen_table() -# assert False -# except ValueError as e: -# print(e) -# print() - -# # Nullable symbols just don't work with constructs like this, because you can't -# # look ahead to figure out if you should reduce an empty 'F' or not. -# print("grammar_nullable (LR0):") -# grammar_nullable = [ -# ("E", ["F", "boop"]), -# ("F", ["beep"]), -# ("F", []), -# ] -# try: -# gen = GenerateLR0("E", grammar_nullable) -# table = gen.gen_table() -# assert False -# except ValueError as e: -# print(e) -# print() - -# print("grammar_lr0_shift_reduce (SLR1):") -# dump_grammar(grammar_lr0_shift_reduce) -# gen = GenerateSLR1("E", grammar_lr0_shift_reduce) -# print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}") -# table = gen.gen_table() -# print(table.format()) -# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True) -# print(format_node(tree) + "\n") -# print() - -# # SLR1 can't handle this. -# print("grammar_aho_ullman_1 (SLR1):") -# grammar_aho_ullman_1 = [ -# ("S", ["L", "=", "R"]), -# ("S", ["R"]), -# ("L", ["*", "R"]), -# ("L", ["id"]), -# ("R", ["L"]), -# ] -# try: -# gen = GenerateSLR1("S", grammar_aho_ullman_1) -# table = gen.gen_table() -# assert False -# except ValueError as e: -# print(e) -# print() - -# # Here's an example with a full LR1 grammar, though. -# print("grammar_aho_ullman_2 (LR1):") -# grammar_aho_ullman_2 = [ -# ("S", ["X", "X"]), -# ("X", ["a", "X"]), -# ("X", ["b"]), -# ] -# gen = GenerateLR1("S", grammar_aho_ullman_2) -# table = gen.gen_table() -# print(table.format()) -# parse(table, ["b", "a", "a", "b"], trace=True) -# print() - -# # What happens if we do LALR to it? -# print("grammar_aho_ullman_2 (LALR):") -# gen = GenerateLALR("S", grammar_aho_ullman_2) -# table = gen.gen_table() -# print(table.format()) -# print() - -# # A fun LALAR grammar. -# print("grammar_lalr:") -# grammar_lalr = [ -# ("S", ["V", "E"]), -# ("E", ["F"]), -# ("E", ["E", "+", "F"]), -# ("F", ["V"]), -# ("F", ["int"]), -# ("F", ["(", "E", ")"]), -# ("V", ["id"]), -# ] -# gen = GenerateLALR("S", grammar_lalr) -# table = gen.gen_table() -# print(table.format()) -# print() - - -# if __name__ == "__main__": -# examples()