Start moving the examples into tests
This commit is contained in:
parent
d3b8d0e836
commit
e04aa1966e
6 changed files with 221 additions and 237 deletions
214
parser/parser.py
214
parser/parser.py
|
|
@ -1097,73 +1097,6 @@ class GenerateLR0:
|
|||
return builder.flush(config_sets)
|
||||
|
||||
|
||||
def parse(table: ParseTable, input, trace=False):
|
||||
"""Parse the input with the generated parsing table and return the
|
||||
concrete syntax tree.
|
||||
|
||||
The parsing table can be generated by GenerateLR0.gen_table() or by any
|
||||
of the other generators below. The parsing mechanism never changes, only
|
||||
the table generation mechanism.
|
||||
|
||||
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
|
||||
one on for you.
|
||||
|
||||
This is not a *great* parser, it's really just a demo for what you can
|
||||
do with the table.
|
||||
"""
|
||||
assert "$" not in input
|
||||
input = input + ["$"]
|
||||
input_index = 0
|
||||
|
||||
# Our stack is a stack of tuples, where the first entry is the state number
|
||||
# and the second entry is the 'value' that was generated when the state was
|
||||
# pushed.
|
||||
stack: list[typing.Tuple[int, typing.Any]] = [(0, None)]
|
||||
while True:
|
||||
current_state = stack[-1][0]
|
||||
current_token = input[input_index]
|
||||
|
||||
action = table.actions[current_state].get(current_token, Error())
|
||||
if trace:
|
||||
print(
|
||||
"{stack: <20} {input: <50} {action: <5}".format(
|
||||
stack=repr([s[0] for s in stack]),
|
||||
input=repr(input[input_index:]),
|
||||
action=repr(action),
|
||||
)
|
||||
)
|
||||
|
||||
match action:
|
||||
case Accept():
|
||||
return stack[-1][1]
|
||||
|
||||
case Reduce(name=name, count=size, transparent=transparent):
|
||||
children = []
|
||||
for _, c in stack[-size:]:
|
||||
if isinstance(c, tuple) and c[0] is None:
|
||||
children.extend(c[1])
|
||||
else:
|
||||
children.append(c)
|
||||
|
||||
value = (name if not transparent else None, tuple(children))
|
||||
stack = stack[:-size]
|
||||
|
||||
goto = table.gotos[stack[-1][0]].get(name)
|
||||
assert goto is not None
|
||||
stack.append((goto, value))
|
||||
|
||||
case Shift(state):
|
||||
stack.append((state, (current_token, ())))
|
||||
input_index += 1
|
||||
|
||||
case Error():
|
||||
raise ValueError(
|
||||
"Syntax error: unexpected symbol {sym}".format(
|
||||
sym=current_token,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# SLR(1)
|
||||
###############################################################################
|
||||
|
|
@ -1978,150 +1911,3 @@ class Grammar:
|
|||
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)
|
||||
table = gen.gen_table()
|
||||
return table
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Formatting
|
||||
###############################################################################
|
||||
def format_node(node):
|
||||
"""Print out an indented concrete syntax tree, from parse()."""
|
||||
lines = ["{name}".format(name=node[0])] + [
|
||||
" " + line for child in node[1] for line in format_node(child).split("\n")
|
||||
]
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Examples
|
||||
###############################################################################
|
||||
def examples():
|
||||
def dump_grammar(grammar):
|
||||
for name, symbols in grammar:
|
||||
print(f"{name} -> {symbols}")
|
||||
print()
|
||||
|
||||
# OK, this is a very simple LR0 grammar.
|
||||
print("grammar_simple:")
|
||||
grammar_simple = [
|
||||
("E", ["E", "+", "T"]),
|
||||
("E", ["T"]),
|
||||
("T", ["(", "E", ")"]),
|
||||
("T", ["id"]),
|
||||
]
|
||||
|
||||
gen = GenerateLR0("E", grammar_simple)
|
||||
table = gen.gen_table()
|
||||
print(table.format())
|
||||
tree = parse(table, ["id", "+", "(", "id", ")"])
|
||||
print(format_node(tree) + "\n")
|
||||
print()
|
||||
|
||||
# This one doesn't work with LR0, though, it has a shift/reduce conflict.
|
||||
print("grammar_lr0_shift_reduce (LR0):")
|
||||
grammar_lr0_shift_reduce = grammar_simple + [
|
||||
("T", ["id", "[", "E", "]"]),
|
||||
]
|
||||
try:
|
||||
gen = GenerateLR0("E", grammar_lr0_shift_reduce)
|
||||
table = gen.gen_table()
|
||||
assert False
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
print()
|
||||
|
||||
# Nor does this: it has a reduce/reduce conflict.
|
||||
print("grammar_lr0_reduce_reduce (LR0):")
|
||||
grammar_lr0_reduce_reduce = grammar_simple + [
|
||||
("E", ["V", "=", "E"]),
|
||||
("V", ["id"]),
|
||||
]
|
||||
try:
|
||||
gen = GenerateLR0("E", grammar_lr0_reduce_reduce)
|
||||
table = gen.gen_table()
|
||||
assert False
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
print()
|
||||
|
||||
# Nullable symbols just don't work with constructs like this, because you can't
|
||||
# look ahead to figure out if you should reduce an empty 'F' or not.
|
||||
print("grammar_nullable (LR0):")
|
||||
grammar_nullable = [
|
||||
("E", ["F", "boop"]),
|
||||
("F", ["beep"]),
|
||||
("F", []),
|
||||
]
|
||||
try:
|
||||
gen = GenerateLR0("E", grammar_nullable)
|
||||
table = gen.gen_table()
|
||||
assert False
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
print()
|
||||
|
||||
print("grammar_lr0_shift_reduce (SLR1):")
|
||||
dump_grammar(grammar_lr0_shift_reduce)
|
||||
gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
|
||||
print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}")
|
||||
table = gen.gen_table()
|
||||
print(table.format())
|
||||
tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True)
|
||||
print(format_node(tree) + "\n")
|
||||
print()
|
||||
|
||||
# SLR1 can't handle this.
|
||||
print("grammar_aho_ullman_1 (SLR1):")
|
||||
grammar_aho_ullman_1 = [
|
||||
("S", ["L", "=", "R"]),
|
||||
("S", ["R"]),
|
||||
("L", ["*", "R"]),
|
||||
("L", ["id"]),
|
||||
("R", ["L"]),
|
||||
]
|
||||
try:
|
||||
gen = GenerateSLR1("S", grammar_aho_ullman_1)
|
||||
table = gen.gen_table()
|
||||
assert False
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
print()
|
||||
|
||||
# Here's an example with a full LR1 grammar, though.
|
||||
print("grammar_aho_ullman_2 (LR1):")
|
||||
grammar_aho_ullman_2 = [
|
||||
("S", ["X", "X"]),
|
||||
("X", ["a", "X"]),
|
||||
("X", ["b"]),
|
||||
]
|
||||
gen = GenerateLR1("S", grammar_aho_ullman_2)
|
||||
table = gen.gen_table()
|
||||
print(table.format())
|
||||
parse(table, ["b", "a", "a", "b"], trace=True)
|
||||
print()
|
||||
|
||||
# What happens if we do LALR to it?
|
||||
print("grammar_aho_ullman_2 (LALR):")
|
||||
gen = GenerateLALR("S", grammar_aho_ullman_2)
|
||||
table = gen.gen_table()
|
||||
print(table.format())
|
||||
print()
|
||||
|
||||
# A fun LALAR grammar.
|
||||
print("grammar_lalr:")
|
||||
grammar_lalr = [
|
||||
("S", ["V", "E"]),
|
||||
("E", ["F"]),
|
||||
("E", ["E", "+", "F"]),
|
||||
("F", ["V"]),
|
||||
("F", ["int"]),
|
||||
("F", ["(", "E", ")"]),
|
||||
("V", ["id"]),
|
||||
]
|
||||
gen = GenerateLALR("S", grammar_lalr)
|
||||
table = gen.gen_table()
|
||||
print(table.format())
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
examples()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue