First and follow.
This commit is contained in:
parent
5a0b1654a9
commit
169ec4ff08
1 changed files with 152 additions and 21 deletions
173
parser.py
173
parser.py
|
|
@ -50,14 +50,19 @@ class GenerateLR0(object):
|
||||||
non-terminal being added, and the second elment of the tuple is the
|
non-terminal being added, and the second elment of the tuple is the
|
||||||
list of terminals and non-terminals that make up the production.
|
list of terminals and non-terminals that make up the production.
|
||||||
|
|
||||||
Don't name anything with double-underscores; those are reserved for the
|
Don't name anything with double-underscores; those are reserved for
|
||||||
generator. Don't add '$' to your
|
the generator. Don't add '$' either, as it is reserved to mean
|
||||||
|
end-of-stream. Use an empty list to indicate nullability, that is:
|
||||||
|
|
||||||
|
('O', []),
|
||||||
|
|
||||||
|
means that O can be matched with nothing.
|
||||||
|
|
||||||
Note that this is implemented in the dumbest way possible, in order to be
|
Note that this is implemented in the dumbest way possible, in order to be
|
||||||
the most understandable it can be. I built this to learn, and I want to
|
the most understandable it can be. I built this to learn, and I want to
|
||||||
make sure I can keep learning with it.
|
make sure I can keep learning with it.
|
||||||
"""
|
"""
|
||||||
def __init__(self, grammar, start):
|
def __init__(self, start, grammar):
|
||||||
"""Initialize the parser generator with the specified grammar and
|
"""Initialize the parser generator with the specified grammar and
|
||||||
start symbol.
|
start symbol.
|
||||||
"""
|
"""
|
||||||
|
|
@ -228,9 +233,15 @@ class GenerateLR0(object):
|
||||||
actions,
|
actions,
|
||||||
a,
|
a,
|
||||||
('reduce', config.name, len(config.symbols)),
|
('reduce', config.name, len(config.symbols)),
|
||||||
|
config,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.set_table_action(actions, '$', ('accept',))
|
self.set_table_action(
|
||||||
|
actions,
|
||||||
|
'$',
|
||||||
|
('accept',),
|
||||||
|
config,
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if config.next in self.terminals:
|
if config.next in self.terminals:
|
||||||
|
|
@ -240,6 +251,7 @@ class GenerateLR0(object):
|
||||||
actions,
|
actions,
|
||||||
config.next,
|
config.next,
|
||||||
('shift', index),
|
('shift', index),
|
||||||
|
config,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Gotos
|
# Gotos
|
||||||
|
|
@ -247,24 +259,125 @@ class GenerateLR0(object):
|
||||||
successor = self.gen_successor(config_set, symbol)
|
successor = self.gen_successor(config_set, symbol)
|
||||||
index = self.find_set_index(config_sets, successor)
|
index = self.find_set_index(config_sets, successor)
|
||||||
if index is not None:
|
if index is not None:
|
||||||
actions[symbol] = ('goto', index)
|
self.set_table_action(
|
||||||
|
actions,
|
||||||
|
symbol,
|
||||||
|
('goto', index),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# set_table_action stores the configs that generated the actions in
|
||||||
|
# the table, for diagnostic purposes. This filters them out again
|
||||||
|
# so that the parser has something clean to work with.
|
||||||
|
actions = {k: self.get_table_action(actions, k) for k in actions}
|
||||||
action_table.append(actions)
|
action_table.append(actions)
|
||||||
|
|
||||||
return action_table
|
return action_table
|
||||||
|
|
||||||
def set_table_action(self, row, symbol, action):
|
def set_table_action(self, row, symbol, action, config):
|
||||||
"""Set the action for 'symbol' in the table row to 'action'.
|
"""Set the action for 'symbol' in the table row to 'action'.
|
||||||
|
|
||||||
This is destructive; it changes the table. It raises an error if
|
This is destructive; it changes the table. It raises an error if
|
||||||
there is already an action for the symbol in the row.
|
there is already an action for the symbol in the row.
|
||||||
"""
|
"""
|
||||||
existing = row.get(symbol, None)
|
existing, existing_config = row.get(symbol, (None, None))
|
||||||
if existing is not None and existing != action:
|
if existing is not None and existing != action:
|
||||||
raise ValueError(
|
config_old = str(existing_config)
|
||||||
"Conflict: {old} vs {new}".format(old=existing, new=action)
|
config_new = str(config)
|
||||||
|
max_len = max(len(config_old), len(config_new)) + 1
|
||||||
|
error = (
|
||||||
|
"Conflicting actions for {symbol}:\n"
|
||||||
|
" {config_old: <{max_len}}: {old}\n"
|
||||||
|
" {config_new: <{max_len}}: {new}\n".format(
|
||||||
|
config_old=config_old,
|
||||||
|
config_new=config_new,
|
||||||
|
max_len=max_len,
|
||||||
|
old=existing,
|
||||||
|
new=action,
|
||||||
|
symbol=symbol,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
row[symbol] = action
|
raise ValueError(error)
|
||||||
|
row[symbol] = (action, config)
|
||||||
|
|
||||||
|
def get_table_action(self, row, symbol):
|
||||||
|
return row[symbol][0]
|
||||||
|
|
||||||
|
|
||||||
|
class GenerateSLR1(GenerateLR0):
|
||||||
|
"""Generate parse tables for SLR1 grammars.
|
||||||
|
|
||||||
|
boop
|
||||||
|
"""
|
||||||
|
def gen_first_symbol(self, symbol, visited):
|
||||||
|
"""Compute the first set for a single symbol.
|
||||||
|
|
||||||
|
'visited' is a set of already visited symbols, to stop infinite
|
||||||
|
recursion on left-recursive grammars. That means that sometimes this
|
||||||
|
function can return an empty tuple. Don't confuse that with a tuple
|
||||||
|
containing epsilon: that's a tuple containing 'None', not an empty
|
||||||
|
tuple.
|
||||||
|
"""
|
||||||
|
if symbol in self.terminals:
|
||||||
|
return (symbol,)
|
||||||
|
elif symbol in visited:
|
||||||
|
return ()
|
||||||
|
else:
|
||||||
|
assert symbol in self.nonterminals
|
||||||
|
visited.add(symbol)
|
||||||
|
|
||||||
|
# All the firsts from all the productions.
|
||||||
|
firsts = [
|
||||||
|
self.gen_first(rule[1], visited)
|
||||||
|
for rule in self.grammar
|
||||||
|
if rule[0] == symbol
|
||||||
|
]
|
||||||
|
|
||||||
|
result = ()
|
||||||
|
for fs in firsts:
|
||||||
|
result = result + tuple(f for f in fs if f not in result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def gen_first(self, symbols, visited=None):
|
||||||
|
"""Compute the first set for a sequence of symbols.
|
||||||
|
|
||||||
|
An epsilon in the set is indicated by 'None'.
|
||||||
|
|
||||||
|
The 'visited' parameter, if not None, is a set of symbols that are
|
||||||
|
already in the process of being evaluated, to deal with left-recursive
|
||||||
|
grammars. (See gen_first_symbol for more.)
|
||||||
|
"""
|
||||||
|
if len(symbols) == 0:
|
||||||
|
return (None,) # Epsilon.
|
||||||
|
else:
|
||||||
|
if visited is None:
|
||||||
|
visited = set()
|
||||||
|
result = self.gen_first_symbol(symbols[0], visited)
|
||||||
|
if None in result:
|
||||||
|
result = tuple(set(s for s in result if s is not None))
|
||||||
|
result = result + self.gen_first(symbols[1:])
|
||||||
|
return result
|
||||||
|
|
||||||
|
def gen_follow(self, symbol):
|
||||||
|
"""Generate the follow set for the given nonterminal."""
|
||||||
|
if symbol == '__start':
|
||||||
|
return tuple('$')
|
||||||
|
|
||||||
|
assert symbol in self.nonterminals
|
||||||
|
follow = ()
|
||||||
|
for production in self.grammar:
|
||||||
|
for index, prod_symbol in enumerate(production[1]):
|
||||||
|
if prod_symbol != symbol:
|
||||||
|
continue
|
||||||
|
|
||||||
|
first = self.gen_first(production[1][index+1:])
|
||||||
|
follow = follow + tuple(f for f in first if f is not None)
|
||||||
|
if None in first:
|
||||||
|
follow = follow + self.gen_follow(production[0])
|
||||||
|
|
||||||
|
assert None not in follow # Should always ground out at __start
|
||||||
|
return follow
|
||||||
|
|
||||||
|
|
||||||
def parse(table, input, trace=False):
|
def parse(table, input, trace=False):
|
||||||
|
|
@ -376,30 +489,48 @@ grammar_simple = [
|
||||||
('T', ['id']),
|
('T', ['id']),
|
||||||
]
|
]
|
||||||
|
|
||||||
gen = GenerateLR0(grammar_simple, 'E')
|
gen = GenerateLR0('E', grammar_simple)
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
tree = parse(table, ['id', '+', '(', 'id', ')'])
|
tree = parse(table, ['id', '+', '(', 'id', ')'])
|
||||||
print(format_node(tree))
|
print(format_node(tree) + "\n")
|
||||||
|
|
||||||
# This one doesn't work with LR0, though, it has a shift/reduce conflict.
|
# This one doesn't work with LR0, though, it has a shift/reduce conflict.
|
||||||
|
grammar_lr0_shift_reduce = grammar_simple + [
|
||||||
|
('T', ['id', '[', 'E', ']']),
|
||||||
|
]
|
||||||
try:
|
try:
|
||||||
grammar_lr0_conflict = grammar_simple + [
|
gen = GenerateLR0('E', grammar_lr0_shift_reduce)
|
||||||
('T', ['id', '[', 'E', ']']),
|
|
||||||
]
|
|
||||||
gen = GenerateLR0(grammar_lr0_conflict, 'E')
|
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
assert False
|
assert False
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
# Nor does this: it has a reduce/reduce conflict.
|
# Nor does this: it has a reduce/reduce conflict.
|
||||||
|
grammar_lr0_reduce_reduce = grammar_simple + [
|
||||||
|
('E', ['V', '=', 'E']),
|
||||||
|
('V', ['id']),
|
||||||
|
]
|
||||||
try:
|
try:
|
||||||
grammar_lr0_conflict = grammar_simple + [
|
gen = GenerateLR0('E', grammar_lr0_reduce_reduce)
|
||||||
('E', ['V', '=', 'E']),
|
|
||||||
('V', ['id']),
|
|
||||||
]
|
|
||||||
gen = GenerateLR0(grammar_lr0_conflict, 'E')
|
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
assert False
|
assert False
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(e)
|
print(e)
|
||||||
|
|
||||||
|
# Nullable symbols just don't work with constructs like this, because you can't
|
||||||
|
# look ahead to figure out if you should reduce an empty 'F' or not.
|
||||||
|
grammar_nullable = [
|
||||||
|
('E', ['F', 'boop']),
|
||||||
|
('F', ['beep']),
|
||||||
|
('F', []),
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
gen = GenerateLR0('E', grammar_nullable)
|
||||||
|
table = gen.gen_table()
|
||||||
|
assert False
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
|
gen = GenerateSLR1('E', grammar_lr0_shift_reduce)
|
||||||
|
print("First: {first}".format(first=str(gen.gen_first(['E']))))
|
||||||
|
print("Follow: {follow}".format(follow=str(gen.gen_follow('E'))))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue