First and follow.

This commit is contained in:
John Doty 2016-12-09 06:35:07 -08:00
parent 5a0b1654a9
commit 169ec4ff08

173
parser.py
View file

@ -50,14 +50,19 @@ class GenerateLR0(object):
non-terminal being added, and the second elment of the tuple is the non-terminal being added, and the second elment of the tuple is the
list of terminals and non-terminals that make up the production. list of terminals and non-terminals that make up the production.
Don't name anything with double-underscores; those are reserved for the Don't name anything with double-underscores; those are reserved for
generator. Don't add '$' to your the generator. Don't add '$' either, as it is reserved to mean
end-of-stream. Use an empty list to indicate nullability, that is:
('O', []),
means that O can be matched with nothing.
Note that this is implemented in the dumbest way possible, in order to be Note that this is implemented in the dumbest way possible, in order to be
the most understandable it can be. I built this to learn, and I want to the most understandable it can be. I built this to learn, and I want to
make sure I can keep learning with it. make sure I can keep learning with it.
""" """
def __init__(self, grammar, start): def __init__(self, start, grammar):
"""Initialize the parser generator with the specified grammar and """Initialize the parser generator with the specified grammar and
start symbol. start symbol.
""" """
@ -228,9 +233,15 @@ class GenerateLR0(object):
actions, actions,
a, a,
('reduce', config.name, len(config.symbols)), ('reduce', config.name, len(config.symbols)),
config,
) )
else: else:
self.set_table_action(actions, '$', ('accept',)) self.set_table_action(
actions,
'$',
('accept',),
config,
)
else: else:
if config.next in self.terminals: if config.next in self.terminals:
@ -240,6 +251,7 @@ class GenerateLR0(object):
actions, actions,
config.next, config.next,
('shift', index), ('shift', index),
config,
) )
# Gotos # Gotos
@ -247,24 +259,125 @@ class GenerateLR0(object):
successor = self.gen_successor(config_set, symbol) successor = self.gen_successor(config_set, symbol)
index = self.find_set_index(config_sets, successor) index = self.find_set_index(config_sets, successor)
if index is not None: if index is not None:
actions[symbol] = ('goto', index) self.set_table_action(
actions,
symbol,
('goto', index),
None,
)
# set_table_action stores the configs that generated the actions in
# the table, for diagnostic purposes. This filters them out again
# so that the parser has something clean to work with.
actions = {k: self.get_table_action(actions, k) for k in actions}
action_table.append(actions) action_table.append(actions)
return action_table return action_table
def set_table_action(self, row, symbol, action): def set_table_action(self, row, symbol, action, config):
"""Set the action for 'symbol' in the table row to 'action'. """Set the action for 'symbol' in the table row to 'action'.
This is destructive; it changes the table. It raises an error if This is destructive; it changes the table. It raises an error if
there is already an action for the symbol in the row. there is already an action for the symbol in the row.
""" """
existing = row.get(symbol, None) existing, existing_config = row.get(symbol, (None, None))
if existing is not None and existing != action: if existing is not None and existing != action:
raise ValueError( config_old = str(existing_config)
"Conflict: {old} vs {new}".format(old=existing, new=action) config_new = str(config)
max_len = max(len(config_old), len(config_new)) + 1
error = (
"Conflicting actions for {symbol}:\n"
" {config_old: <{max_len}}: {old}\n"
" {config_new: <{max_len}}: {new}\n".format(
config_old=config_old,
config_new=config_new,
max_len=max_len,
old=existing,
new=action,
symbol=symbol,
)
) )
row[symbol] = action raise ValueError(error)
row[symbol] = (action, config)
def get_table_action(self, row, symbol):
return row[symbol][0]
class GenerateSLR1(GenerateLR0):
"""Generate parse tables for SLR1 grammars.
boop
"""
def gen_first_symbol(self, symbol, visited):
"""Compute the first set for a single symbol.
'visited' is a set of already visited symbols, to stop infinite
recursion on left-recursive grammars. That means that sometimes this
function can return an empty tuple. Don't confuse that with a tuple
containing epsilon: that's a tuple containing 'None', not an empty
tuple.
"""
if symbol in self.terminals:
return (symbol,)
elif symbol in visited:
return ()
else:
assert symbol in self.nonterminals
visited.add(symbol)
# All the firsts from all the productions.
firsts = [
self.gen_first(rule[1], visited)
for rule in self.grammar
if rule[0] == symbol
]
result = ()
for fs in firsts:
result = result + tuple(f for f in fs if f not in result)
return result
def gen_first(self, symbols, visited=None):
"""Compute the first set for a sequence of symbols.
An epsilon in the set is indicated by 'None'.
The 'visited' parameter, if not None, is a set of symbols that are
already in the process of being evaluated, to deal with left-recursive
grammars. (See gen_first_symbol for more.)
"""
if len(symbols) == 0:
return (None,) # Epsilon.
else:
if visited is None:
visited = set()
result = self.gen_first_symbol(symbols[0], visited)
if None in result:
result = tuple(set(s for s in result if s is not None))
result = result + self.gen_first(symbols[1:])
return result
def gen_follow(self, symbol):
"""Generate the follow set for the given nonterminal."""
if symbol == '__start':
return tuple('$')
assert symbol in self.nonterminals
follow = ()
for production in self.grammar:
for index, prod_symbol in enumerate(production[1]):
if prod_symbol != symbol:
continue
first = self.gen_first(production[1][index+1:])
follow = follow + tuple(f for f in first if f is not None)
if None in first:
follow = follow + self.gen_follow(production[0])
assert None not in follow # Should always ground out at __start
return follow
def parse(table, input, trace=False): def parse(table, input, trace=False):
@ -376,30 +489,48 @@ grammar_simple = [
('T', ['id']), ('T', ['id']),
] ]
gen = GenerateLR0(grammar_simple, 'E') gen = GenerateLR0('E', grammar_simple)
table = gen.gen_table() table = gen.gen_table()
tree = parse(table, ['id', '+', '(', 'id', ')']) tree = parse(table, ['id', '+', '(', 'id', ')'])
print(format_node(tree)) print(format_node(tree) + "\n")
# This one doesn't work with LR0, though, it has a shift/reduce conflict. # This one doesn't work with LR0, though, it has a shift/reduce conflict.
grammar_lr0_shift_reduce = grammar_simple + [
('T', ['id', '[', 'E', ']']),
]
try: try:
grammar_lr0_conflict = grammar_simple + [ gen = GenerateLR0('E', grammar_lr0_shift_reduce)
('T', ['id', '[', 'E', ']']),
]
gen = GenerateLR0(grammar_lr0_conflict, 'E')
table = gen.gen_table() table = gen.gen_table()
assert False assert False
except ValueError as e: except ValueError as e:
print(e) print(e)
# Nor does this: it has a reduce/reduce conflict. # Nor does this: it has a reduce/reduce conflict.
grammar_lr0_reduce_reduce = grammar_simple + [
('E', ['V', '=', 'E']),
('V', ['id']),
]
try: try:
grammar_lr0_conflict = grammar_simple + [ gen = GenerateLR0('E', grammar_lr0_reduce_reduce)
('E', ['V', '=', 'E']),
('V', ['id']),
]
gen = GenerateLR0(grammar_lr0_conflict, 'E')
table = gen.gen_table() table = gen.gen_table()
assert False assert False
except ValueError as e: except ValueError as e:
print(e) print(e)
# Nullable symbols just don't work with constructs like this, because you can't
# look ahead to figure out if you should reduce an empty 'F' or not.
grammar_nullable = [
('E', ['F', 'boop']),
('F', ['beep']),
('F', []),
]
try:
gen = GenerateLR0('E', grammar_nullable)
table = gen.gen_table()
assert False
except ValueError as e:
print(e)
gen = GenerateSLR1('E', grammar_lr0_shift_reduce)
print("First: {first}".format(first=str(gen.gen_first(['E']))))
print("Follow: {follow}".format(follow=str(gen.gen_follow('E'))))