faster: Precedence support, necessary for IfStatement
This commit is contained in:
parent
c100613ff5
commit
d0be3ea267
1 changed files with 109 additions and 65 deletions
174
parser_faster.py
174
parser_faster.py
|
|
@ -3,10 +3,13 @@ might expect the code did NOT work acceptibly.
|
||||||
|
|
||||||
This version has some performance work done.
|
This version has some performance work done.
|
||||||
|
|
||||||
|
It also supports precedence.
|
||||||
|
|
||||||
2023
|
2023
|
||||||
"""
|
"""
|
||||||
import collections
|
import collections
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
import enum
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -196,6 +199,14 @@ class ConfigurationSetInfo:
|
||||||
raise KeyError("Unable to find a path to the target set!")
|
raise KeyError("Unable to find a path to the target set!")
|
||||||
|
|
||||||
|
|
||||||
|
class Assoc(enum.Enum):
|
||||||
|
"""Associativity of a rule."""
|
||||||
|
NONE = 0
|
||||||
|
LEFT = 1
|
||||||
|
RIGHT = 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ErrorCollection:
|
class ErrorCollection:
|
||||||
errors: dict[ConfigSet, dict[int, dict[Configuration, typing.Tuple]]]
|
errors: dict[ConfigSet, dict[int, dict[Configuration, typing.Tuple]]]
|
||||||
|
|
||||||
|
|
@ -259,15 +270,17 @@ class ErrorCollection:
|
||||||
|
|
||||||
|
|
||||||
class TableBuilder(object):
|
class TableBuilder(object):
|
||||||
row: None | list[typing.Tuple[None | typing.Tuple, None | Configuration]]
|
|
||||||
table: list[dict[str, typing.Tuple]]
|
|
||||||
config_sets: dict[ConfigSet, int] # Map config sets to rows.
|
|
||||||
errors: ErrorCollection
|
errors: ErrorCollection
|
||||||
|
table: list[dict[str, typing.Tuple]]
|
||||||
|
alphabet: list[str]
|
||||||
|
precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
|
||||||
|
row: None | list[typing.Tuple[None | typing.Tuple, None | Configuration]]
|
||||||
|
|
||||||
def __init__(self, alphabet: list[str]):
|
def __init__(self, alphabet: list[str], precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]):
|
||||||
self.errors = ErrorCollection()
|
self.errors = ErrorCollection()
|
||||||
self.table = []
|
self.table = []
|
||||||
self.alphabet = alphabet
|
self.alphabet = alphabet
|
||||||
|
self.precedence = precedence
|
||||||
self.row = None
|
self.row = None
|
||||||
|
|
||||||
def flush(self, all_sets: ConfigurationSetInfo):
|
def flush(self, all_sets: ConfigurationSetInfo):
|
||||||
|
|
@ -322,13 +335,56 @@ class TableBuilder(object):
|
||||||
assert existing_config is not None
|
assert existing_config is not None
|
||||||
assert config is not None
|
assert config is not None
|
||||||
|
|
||||||
# Record the conflicts.
|
# Maybe we can resolve the conflict with precedence?
|
||||||
self.errors.add_error(self.current_config_set, symbol_id, existing_config, existing)
|
existing_assoc, existing_prec = self.precedence[existing_config.name]
|
||||||
self.errors.add_error(self.current_config_set, symbol_id, config, action)
|
new_assoc, new_prec = self.precedence[config.name]
|
||||||
|
|
||||||
|
if existing_prec > new_prec:
|
||||||
|
# Precedence of the action in the table already wins, do nothing.
|
||||||
|
return
|
||||||
|
|
||||||
|
elif existing_prec == new_prec:
|
||||||
|
# It's an actual conflict, use associativity if we can.
|
||||||
|
# If there's a conflict in associativity then it's a real conflict!
|
||||||
|
assoc = Assoc.NONE
|
||||||
|
if existing_assoc == Assoc.NONE:
|
||||||
|
assoc = new_assoc
|
||||||
|
elif new_assoc == Assoc.NONE:
|
||||||
|
assoc = existing_assoc
|
||||||
|
elif new_assoc == existing_assoc:
|
||||||
|
assoc = new_assoc
|
||||||
|
|
||||||
|
resolved = False
|
||||||
|
if assoc == Assoc.LEFT:
|
||||||
|
# Prefer reduce over shift
|
||||||
|
if action[0] == 'shift' and existing[0] == 'reduce':
|
||||||
|
action = existing
|
||||||
|
resolved = True
|
||||||
|
elif action[0] == 'reduce' and existing[0] == 'shift':
|
||||||
|
resolved = True
|
||||||
|
|
||||||
|
elif assoc == Assoc.RIGHT:
|
||||||
|
# Prefer shift over reduce
|
||||||
|
if action[0] == 'shift' and existing[0] == 'reduce':
|
||||||
|
resolved = True
|
||||||
|
elif action[0] == 'reduce' and existing[0] == 'shift':
|
||||||
|
action = existing
|
||||||
|
resolved = True
|
||||||
|
|
||||||
|
if not resolved:
|
||||||
|
# Record the conflicts.
|
||||||
|
self.errors.add_error(self.current_config_set, symbol_id, existing_config, existing)
|
||||||
|
self.errors.add_error(self.current_config_set, symbol_id, config, action)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Precedence of the new action is greater than the existing
|
||||||
|
# action, just allow the overwrite with no change.
|
||||||
|
pass
|
||||||
|
|
||||||
self.row[symbol_id] = (action, config)
|
self.row[symbol_id] = (action, config)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GenerateLR0(object):
|
class GenerateLR0(object):
|
||||||
"""Generate parser tables for an LR0 parser.
|
"""Generate parser tables for an LR0 parser.
|
||||||
|
|
||||||
|
|
@ -357,24 +413,13 @@ class GenerateLR0(object):
|
||||||
('O', []),
|
('O', []),
|
||||||
|
|
||||||
means that O can be matched with nothing.
|
means that O can be matched with nothing.
|
||||||
|
|
||||||
Implementation notes:
|
|
||||||
- This is implemented in the dumbest way possible, in order to be the
|
|
||||||
most understandable it can be. I built this to learn, and I want to
|
|
||||||
make sure I can keep learning with it.
|
|
||||||
|
|
||||||
- We tend to use tuples everywhere. This is because tuples can be
|
|
||||||
compared for equality and put into tables and all that jazz. They might
|
|
||||||
be a little bit slower in places but like I said, this is for
|
|
||||||
learning. (Also, if we need this to run faster we can probably go a
|
|
||||||
long way by memoizing results, which is much easier if we have tuples
|
|
||||||
everywhere.)
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
alphabet: list[str]
|
alphabet: list[str]
|
||||||
grammar: list[list[typing.Tuple[int, ...]]]
|
grammar: list[list[typing.Tuple[int, ...]]]
|
||||||
nonterminals: typing.Tuple[bool, ...]
|
nonterminal: typing.Tuple[bool, ...]
|
||||||
terminals: typing.Tuple[bool, ...]
|
terminal: typing.Tuple[bool, ...]
|
||||||
|
precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
|
||||||
|
|
||||||
symbol_key: dict[str, int]
|
symbol_key: dict[str, int]
|
||||||
start_symbol: int
|
start_symbol: int
|
||||||
|
|
@ -384,7 +429,12 @@ class GenerateLR0(object):
|
||||||
successors: list[set[int]]
|
successors: list[set[int]]
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, start: str, grammar: list[typing.Tuple[str, list[str]]]):
|
def __init__(
|
||||||
|
self,
|
||||||
|
start: str,
|
||||||
|
grammar: list[typing.Tuple[str, list[str]]],
|
||||||
|
precedence: None | dict[str, typing.Tuple[Assoc, int]] = None,
|
||||||
|
):
|
||||||
"""Initialize the parser generator with the specified grammar and
|
"""Initialize the parser generator with the specified grammar and
|
||||||
start symbol.
|
start symbol.
|
||||||
"""
|
"""
|
||||||
|
|
@ -426,30 +476,34 @@ class GenerateLR0(object):
|
||||||
# We count on python dictionaries retaining the insertion order, like
|
# We count on python dictionaries retaining the insertion order, like
|
||||||
# it or not.
|
# it or not.
|
||||||
full_grammar = [list() for _ in self.alphabet]
|
full_grammar = [list() for _ in self.alphabet]
|
||||||
terminals = [True for _ in self.alphabet]
|
terminal = [True for _ in self.alphabet]
|
||||||
assert terminals[end_symbol]
|
assert terminal[end_symbol]
|
||||||
|
|
||||||
nonterminals = [False for _ in self.alphabet]
|
nonterminal = [False for _ in self.alphabet]
|
||||||
|
|
||||||
for name, rule in grammar:
|
for name, rule in grammar:
|
||||||
name_symbol = symbol_key[name]
|
name_symbol = symbol_key[name]
|
||||||
|
|
||||||
terminals[name_symbol] = False
|
terminal[name_symbol] = False
|
||||||
nonterminals[name_symbol] = True
|
nonterminal[name_symbol] = True
|
||||||
|
|
||||||
rules = full_grammar[name_symbol]
|
rules = full_grammar[name_symbol]
|
||||||
rules.append(tuple(symbol_key[symbol] for symbol in rule))
|
rules.append(tuple(symbol_key[symbol] for symbol in rule))
|
||||||
|
|
||||||
self.grammar = full_grammar
|
self.grammar = full_grammar
|
||||||
self.grammar[start_symbol].append((symbol_key[start],))
|
self.grammar[start_symbol].append((symbol_key[start],))
|
||||||
terminals[start_symbol] = False
|
terminal[start_symbol] = False
|
||||||
nonterminals[start_symbol] = True
|
nonterminal[start_symbol] = True
|
||||||
|
|
||||||
self.terminals = tuple(terminals)
|
self.terminal = tuple(terminal)
|
||||||
self.nonterminals = tuple(nonterminals)
|
self.nonterminal = tuple(nonterminal)
|
||||||
|
|
||||||
assert self.terminals[end_symbol]
|
assert self.terminal[end_symbol]
|
||||||
assert self.nonterminals[start_symbol]
|
assert self.nonterminal[start_symbol]
|
||||||
|
|
||||||
|
if precedence is None:
|
||||||
|
precedence = {}
|
||||||
|
self.precedence = tuple(precedence.get(a, (Assoc.NONE, 0)) for a in self.alphabet)
|
||||||
|
|
||||||
self.symbol_key = symbol_key
|
self.symbol_key = symbol_key
|
||||||
self.start_symbol = start_symbol
|
self.start_symbol = start_symbol
|
||||||
|
|
@ -497,7 +551,7 @@ class GenerateLR0(object):
|
||||||
|
|
||||||
return tuple(sorted(closure)) # TODO: Why tuple?
|
return tuple(sorted(closure)) # TODO: Why tuple?
|
||||||
|
|
||||||
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: str) -> ConfigSet:
|
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
|
||||||
"""Compute the successor state for the given config set and the
|
"""Compute the successor state for the given config set and the
|
||||||
given symbol.
|
given symbol.
|
||||||
|
|
||||||
|
|
@ -564,7 +618,7 @@ class GenerateLR0(object):
|
||||||
|
|
||||||
In an LR0 parser, this is just the set of all terminals."""
|
In an LR0 parser, this is just the set of all terminals."""
|
||||||
del(config)
|
del(config)
|
||||||
return [index for index, value in enumerate(self.terminals) if value]
|
return [index for index, value in enumerate(self.terminal) if value]
|
||||||
|
|
||||||
def gen_table(self):
|
def gen_table(self):
|
||||||
"""Generate the parse table.
|
"""Generate the parse table.
|
||||||
|
|
@ -595,7 +649,7 @@ class GenerateLR0(object):
|
||||||
Anything missing from the row indicates an error.
|
Anything missing from the row indicates an error.
|
||||||
"""
|
"""
|
||||||
config_sets = self.gen_all_sets()
|
config_sets = self.gen_all_sets()
|
||||||
builder = TableBuilder(self.alphabet)
|
builder = TableBuilder(self.alphabet, self.precedence)
|
||||||
|
|
||||||
for config_set_id, config_set in enumerate(config_sets.sets):
|
for config_set_id, config_set in enumerate(config_sets.sets):
|
||||||
builder.new_row(config_set)
|
builder.new_row(config_set)
|
||||||
|
|
@ -610,13 +664,13 @@ class GenerateLR0(object):
|
||||||
else:
|
else:
|
||||||
builder.set_table_accept(self.end_symbol, config)
|
builder.set_table_accept(self.end_symbol, config)
|
||||||
|
|
||||||
elif self.terminals[config_next]:
|
elif self.terminal[config_next]:
|
||||||
index = successors[config_next]
|
index = successors[config_next]
|
||||||
builder.set_table_shift(config_next, index, config)
|
builder.set_table_shift(config_next, index, config)
|
||||||
|
|
||||||
# Gotos
|
# Gotos
|
||||||
for symbol, index in successors.items():
|
for symbol, index in successors.items():
|
||||||
if self.nonterminals[symbol]:
|
if self.nonterminal[symbol]:
|
||||||
builder.set_table_goto(symbol, index)
|
builder.set_table_goto(symbol, index)
|
||||||
|
|
||||||
return builder.flush(config_sets)
|
return builder.flush(config_sets)
|
||||||
|
|
@ -700,27 +754,22 @@ class FirstInfo:
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_grammar(
|
def from_grammar(
|
||||||
cls,
|
cls,
|
||||||
alphabet: list[str],
|
|
||||||
grammar: list[list[typing.Tuple[int,...]]],
|
grammar: list[list[typing.Tuple[int,...]]],
|
||||||
terminals: typing.Tuple[bool, ...],
|
terminal: typing.Tuple[bool, ...],
|
||||||
):
|
):
|
||||||
# print("******* GENERATING FIRSTS ********")
|
|
||||||
|
|
||||||
# Add all terminals to their own firsts
|
# Add all terminals to their own firsts
|
||||||
firsts = []
|
firsts = []
|
||||||
for index, is_terminal in enumerate(terminals):
|
for index, is_terminal in enumerate(terminal):
|
||||||
firsts.append(set())
|
firsts.append(set())
|
||||||
if is_terminal:
|
if is_terminal:
|
||||||
firsts[index].add(index)
|
firsts[index].add(index)
|
||||||
|
|
||||||
epsilons = [False for _ in terminals]
|
epsilons = [False for _ in terminal]
|
||||||
changed = True
|
changed = True
|
||||||
while changed:
|
while changed:
|
||||||
# print("========= ITERATION")
|
|
||||||
changed = False
|
changed = False
|
||||||
for name, rules in enumerate(grammar):
|
for name, rules in enumerate(grammar):
|
||||||
f = firsts[name]
|
f = firsts[name]
|
||||||
# print(f" {alphabet[name]} -> {[alphabet[s] for s in f]}")
|
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
if len(rule) == 0:
|
if len(rule) == 0:
|
||||||
changed = changed or not epsilons[name]
|
changed = changed or not epsilons[name]
|
||||||
|
|
@ -728,11 +777,7 @@ class FirstInfo:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for index, symbol in enumerate(rule):
|
for index, symbol in enumerate(rule):
|
||||||
# if terminals[symbol]:
|
|
||||||
# changed = add_changed(f, symbol) or changed
|
|
||||||
# else:
|
|
||||||
other_firsts = firsts[symbol]
|
other_firsts = firsts[symbol]
|
||||||
# print(f" adding {alphabet[symbol]} -> {[alphabet[s] for s in other_firsts]}")
|
|
||||||
changed = update_changed(f, other_firsts) or changed
|
changed = update_changed(f, other_firsts) or changed
|
||||||
|
|
||||||
is_last = index == len(rule) - 1
|
is_last = index == len(rule) - 1
|
||||||
|
|
@ -750,7 +795,6 @@ class FirstInfo:
|
||||||
# looping through the symbols in this rule.
|
# looping through the symbols in this rule.
|
||||||
break
|
break
|
||||||
|
|
||||||
# print("******* DONE GENERATING FIRSTS ********")
|
|
||||||
return FirstInfo(firsts=firsts, is_epsilon=epsilons)
|
return FirstInfo(firsts=firsts, is_epsilon=epsilons)
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
|
@ -761,7 +805,7 @@ class FollowInfo:
|
||||||
def from_grammar(
|
def from_grammar(
|
||||||
cls,
|
cls,
|
||||||
grammar: list[list[typing.Tuple[int,...]]],
|
grammar: list[list[typing.Tuple[int,...]]],
|
||||||
terminals: typing.Tuple[bool, ...],
|
terminal: typing.Tuple[bool, ...],
|
||||||
start_symbol: int,
|
start_symbol: int,
|
||||||
end_symbol: int,
|
end_symbol: int,
|
||||||
firsts: FirstInfo,
|
firsts: FirstInfo,
|
||||||
|
|
@ -778,7 +822,7 @@ class FollowInfo:
|
||||||
prev_symbol = None
|
prev_symbol = None
|
||||||
for symbol in reversed(rule):
|
for symbol in reversed(rule):
|
||||||
f = follows[symbol]
|
f = follows[symbol]
|
||||||
if terminals[symbol]:
|
if terminal[symbol]:
|
||||||
# This particular rule can't produce epsilon.
|
# This particular rule can't produce epsilon.
|
||||||
epsilon = False
|
epsilon = False
|
||||||
prev_symbol = symbol
|
prev_symbol = symbol
|
||||||
|
|
@ -826,10 +870,10 @@ class GenerateSLR1(GenerateLR0):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self._firsts = FirstInfo.from_grammar(self.alphabet, self.grammar, self.terminals)
|
self._firsts = FirstInfo.from_grammar(self.grammar, self.terminal)
|
||||||
self._follows = FollowInfo.from_grammar(
|
self._follows = FollowInfo.from_grammar(
|
||||||
self.grammar,
|
self.grammar,
|
||||||
self.terminals,
|
self.terminal,
|
||||||
self.start_symbol,
|
self.start_symbol,
|
||||||
self.end_symbol,
|
self.end_symbol,
|
||||||
self._firsts,
|
self._firsts,
|
||||||
|
|
@ -1049,24 +1093,24 @@ def format_table(generator, table):
|
||||||
elif action[0] == 'reduce':
|
elif action[0] == 'reduce':
|
||||||
return 'r' + str(action[1])
|
return 'r' + str(action[1])
|
||||||
|
|
||||||
terminals = [
|
terminals = list(sorted(
|
||||||
generator.alphabet[i]
|
generator.alphabet[i]
|
||||||
for i,v in enumerate(generator.terminals)
|
for i,v in enumerate(generator.terminal)
|
||||||
if v
|
if v
|
||||||
]
|
))
|
||||||
nonterminals = [
|
nonterminals = list(sorted(
|
||||||
generator.alphabet[i]
|
generator.alphabet[i]
|
||||||
for i,v in enumerate(generator.nonterminals)
|
for i,v in enumerate(generator.nonterminal)
|
||||||
if v
|
if v
|
||||||
]
|
))
|
||||||
header = " | {terms} | {nts}".format(
|
header = " | {terms} | {nts}".format(
|
||||||
terms=' '.join(
|
terms=' '.join(
|
||||||
'{0: <6}'.format(terminal)
|
'{0: <6}'.format(terminal)
|
||||||
for terminal in sorted(terminals)
|
for terminal in terminals
|
||||||
),
|
),
|
||||||
nts=' '.join(
|
nts=' '.join(
|
||||||
'{0: <5}'.format(nt)
|
'{0: <5}'.format(nt)
|
||||||
for nt in sorted(nonterminals)
|
for nt in nonterminals
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1078,11 +1122,11 @@ def format_table(generator, table):
|
||||||
index=i,
|
index=i,
|
||||||
actions=' '.join(
|
actions=' '.join(
|
||||||
'{0: <6}'.format(format_action(row, terminal))
|
'{0: <6}'.format(format_action(row, terminal))
|
||||||
for terminal in sorted(terminals)
|
for terminal in terminals
|
||||||
),
|
),
|
||||||
gotos=' '.join(
|
gotos=' '.join(
|
||||||
'{0: <5}'.format(row.get(nt, ('error', ''))[1])
|
'{0: <5}'.format(row.get(nt, ('error', ''))[1])
|
||||||
for nt in sorted(nonterminals)
|
for nt in nonterminals
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
for i, row in enumerate(table)
|
for i, row in enumerate(table)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue