Performance
This commit is contained in:
parent
cabc091ee4
commit
8c3b1b784c
1 changed files with 112 additions and 145 deletions
257
parser.py
257
parser.py
|
|
@ -145,7 +145,71 @@ import typing
|
||||||
#
|
#
|
||||||
# We start with LR0 parsers, because they form the basis of everything else.
|
# We start with LR0 parsers, because they form the basis of everything else.
|
||||||
###############################################################################
|
###############################################################################
|
||||||
class Configuration:
|
class ConfigurationCore(typing.NamedTuple):
|
||||||
|
name: int
|
||||||
|
symbols: typing.Tuple[int, ...]
|
||||||
|
position: int
|
||||||
|
next: int | None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_rule(cls, name: int, symbols: typing.Tuple[int, ...]):
|
||||||
|
if len(symbols) == 0:
|
||||||
|
next = None
|
||||||
|
else:
|
||||||
|
next = symbols[0]
|
||||||
|
return ConfigurationCore(
|
||||||
|
name=name,
|
||||||
|
symbols=symbols,
|
||||||
|
position=0,
|
||||||
|
next=next,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def at_end(self) -> bool:
|
||||||
|
return self.position == len(self.symbols)
|
||||||
|
|
||||||
|
def replace_position(self, new_position):
|
||||||
|
if new_position == len(self.symbols):
|
||||||
|
next = None
|
||||||
|
else:
|
||||||
|
next = self.symbols[new_position]
|
||||||
|
return ConfigurationCore(
|
||||||
|
name=self.name,
|
||||||
|
symbols=self.symbols,
|
||||||
|
position=new_position,
|
||||||
|
next=next,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rest(self) -> typing.Tuple[int, ...]:
|
||||||
|
return self.symbols[(self.position + 1) :]
|
||||||
|
|
||||||
|
def __repr__(self) -> str:
|
||||||
|
return "{name} -> {bits}".format(
|
||||||
|
name=self.name,
|
||||||
|
bits=" ".join(
|
||||||
|
[
|
||||||
|
("* " + str(sym)) if i == self.position else str(sym)
|
||||||
|
for i, sym in enumerate(self.symbols)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
+ (" *" if self.at_end else ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
def format(self, alphabet: list[str]) -> str:
|
||||||
|
return "{name} -> {bits}".format(
|
||||||
|
name=alphabet[self.name],
|
||||||
|
bits=" ".join(
|
||||||
|
[
|
||||||
|
"* " + alphabet[sym] if i == self.position else alphabet[sym]
|
||||||
|
for i, sym in enumerate(self.symbols)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
+ (" *" if self.at_end else ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Configuration(typing.NamedTuple):
|
||||||
"""A rule being tracked in a state. That is, a specific position within a
|
"""A rule being tracked in a state. That is, a specific position within a
|
||||||
specific rule, with an associated lookahead state.
|
specific rule, with an associated lookahead state.
|
||||||
|
|
||||||
|
|
@ -162,125 +226,34 @@ class Configuration:
|
||||||
the part about LR(1).)
|
the part about LR(1).)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__slots__ = (
|
core: ConfigurationCore
|
||||||
"name",
|
|
||||||
"symbols",
|
|
||||||
"position",
|
|
||||||
"lookahead",
|
|
||||||
"next",
|
|
||||||
"at_end",
|
|
||||||
"_vals",
|
|
||||||
"_hash",
|
|
||||||
)
|
|
||||||
|
|
||||||
name: int
|
|
||||||
symbols: typing.Tuple[int, ...]
|
|
||||||
position: int
|
|
||||||
lookahead: typing.Tuple[int, ...]
|
lookahead: typing.Tuple[int, ...]
|
||||||
next: int | None
|
|
||||||
at_end: bool
|
|
||||||
|
|
||||||
_vals: typing.Tuple
|
|
||||||
_hash: int
|
|
||||||
|
|
||||||
def __init__(self, name, symbols, position, lookahead) -> None:
|
|
||||||
self.name = name
|
|
||||||
self.symbols = symbols
|
|
||||||
self.position = position
|
|
||||||
self.lookahead = lookahead
|
|
||||||
|
|
||||||
at_end = position == len(symbols)
|
|
||||||
self.at_end = at_end
|
|
||||||
self.next = symbols[position] if not at_end else None
|
|
||||||
|
|
||||||
self._vals = (name, symbols, position, lookahead)
|
|
||||||
self._hash = hash(self._vals)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_rule(cls, name: int, symbols: typing.Tuple[int, ...], lookahead=()):
|
def from_rule(cls, name: int, symbols: typing.Tuple[int, ...], lookahead=()):
|
||||||
|
# Consider adding at_end and next to the namedtuple.
|
||||||
return Configuration(
|
return Configuration(
|
||||||
name=name,
|
core=ConfigurationCore.from_rule(name, symbols),
|
||||||
symbols=symbols,
|
|
||||||
position=0,
|
|
||||||
lookahead=lookahead,
|
|
||||||
)
|
|
||||||
|
|
||||||
def __hash__(self) -> int:
|
|
||||||
return self._hash
|
|
||||||
|
|
||||||
def __eq__(self, value: typing.Any, /) -> bool:
|
|
||||||
if value is self:
|
|
||||||
return True
|
|
||||||
|
|
||||||
return (
|
|
||||||
value._hash == self._hash
|
|
||||||
and value.name == self.name
|
|
||||||
and value.position == self.position
|
|
||||||
and value.symbols == self.symbols
|
|
||||||
and value.lookahead == self.lookahead
|
|
||||||
)
|
|
||||||
|
|
||||||
def __lt__(self, value) -> bool:
|
|
||||||
if not isinstance(value, Configuration):
|
|
||||||
return NotImplemented
|
|
||||||
return self._vals < value._vals
|
|
||||||
|
|
||||||
def __gt__(self, value) -> bool:
|
|
||||||
if not isinstance(value, Configuration):
|
|
||||||
return NotImplemented
|
|
||||||
return self._vals > value._vals
|
|
||||||
|
|
||||||
def __le__(self, value) -> bool:
|
|
||||||
if not isinstance(value, Configuration):
|
|
||||||
return NotImplemented
|
|
||||||
return self._vals <= value._vals
|
|
||||||
|
|
||||||
def __ge__(self, value) -> bool:
|
|
||||||
if not isinstance(value, Configuration):
|
|
||||||
return NotImplemented
|
|
||||||
return self._vals >= value._vals
|
|
||||||
|
|
||||||
def replace_position(self, new_position):
|
|
||||||
return Configuration(
|
|
||||||
name=self.name,
|
|
||||||
symbols=self.symbols,
|
|
||||||
position=new_position,
|
|
||||||
lookahead=self.lookahead,
|
|
||||||
)
|
|
||||||
|
|
||||||
def clear_lookahead(self):
|
|
||||||
return Configuration(
|
|
||||||
name=self.name,
|
|
||||||
symbols=self.symbols,
|
|
||||||
position=self.position,
|
|
||||||
lookahead=(),
|
|
||||||
)
|
|
||||||
|
|
||||||
def replace_lookahead(self, lookahead: typing.Tuple[int, ...]):
|
|
||||||
return Configuration(
|
|
||||||
name=self.name,
|
|
||||||
symbols=self.symbols,
|
|
||||||
position=self.position,
|
|
||||||
lookahead=lookahead,
|
lookahead=lookahead,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def at_end(self) -> bool:
|
||||||
|
return self.core.next is None
|
||||||
|
|
||||||
|
def replace_position(self, new_position):
|
||||||
|
return Configuration(
|
||||||
|
core=self.core.replace_position(new_position),
|
||||||
|
lookahead=self.lookahead,
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def rest(self):
|
def rest(self):
|
||||||
return self.symbols[(self.position + 1) :]
|
return self.core.symbols[(self.core.position + 1) :]
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
la = ", " + str(self.lookahead) if self.lookahead != () else ""
|
la = ", " + str(self.lookahead) if self.lookahead != () else ""
|
||||||
return "{name} -> {bits}{lookahead}".format(
|
return f"{repr(self.core)}{la}"
|
||||||
name=self.name,
|
|
||||||
bits=" ".join(
|
|
||||||
[
|
|
||||||
("* " + str(sym)) if i == self.position else str(sym)
|
|
||||||
for i, sym in enumerate(self.symbols)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
+ (" *" if self.at_end else ""),
|
|
||||||
lookahead=la,
|
|
||||||
)
|
|
||||||
|
|
||||||
def format(self, alphabet: list[str]) -> str:
|
def format(self, alphabet: list[str]) -> str:
|
||||||
if self.lookahead != ():
|
if self.lookahead != ():
|
||||||
|
|
@ -288,20 +261,13 @@ class Configuration:
|
||||||
else:
|
else:
|
||||||
la = ""
|
la = ""
|
||||||
|
|
||||||
return "{name} -> {bits}{lookahead}".format(
|
return f"{self.core.format(alphabet)}{la}"
|
||||||
name=alphabet[self.name],
|
|
||||||
bits=" ".join(
|
|
||||||
[
|
class CoreSet(frozenset[ConfigurationCore]):
|
||||||
"* " + alphabet[sym] if i == self.position else alphabet[sym]
|
pass
|
||||||
for i, sym in enumerate(self.symbols)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
+ (" *" if self.at_end else ""),
|
|
||||||
lookahead=la,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# ConfigSet = typing.Tuple[Configuration, ...]
|
|
||||||
class ConfigSet(frozenset[Configuration]):
|
class ConfigSet(frozenset[Configuration]):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -548,12 +514,13 @@ class ErrorCollection:
|
||||||
for symbol, symbol_errors in set_errors.items():
|
for symbol, symbol_errors in set_errors.items():
|
||||||
actions = []
|
actions = []
|
||||||
for config, action in symbol_errors.items():
|
for config, action in symbol_errors.items():
|
||||||
name = alphabet[config.name]
|
core = config.core
|
||||||
|
name = alphabet[core.name]
|
||||||
rule = " ".join(
|
rule = " ".join(
|
||||||
f"{'* ' if config.position == i else ''}{alphabet[s]}"
|
f"{'* ' if core.position == i else ''}{alphabet[s]}"
|
||||||
for i, s in enumerate(config.symbols)
|
for i, s in enumerate(core.symbols)
|
||||||
)
|
)
|
||||||
if config.next is None:
|
if config.at_end:
|
||||||
rule += " *"
|
rule += " *"
|
||||||
|
|
||||||
match action:
|
match action:
|
||||||
|
|
@ -700,9 +667,9 @@ class TableBuilder(object):
|
||||||
"""Mark a reduce of the given configuration for the given symbol in the
|
"""Mark a reduce of the given configuration for the given symbol in the
|
||||||
current row.
|
current row.
|
||||||
"""
|
"""
|
||||||
name = self.alphabet[config.name]
|
name = self.alphabet[config.core.name]
|
||||||
transparent = name in self.transparents
|
transparent = name in self.transparents
|
||||||
action = Reduce(name, len(config.symbols), transparent)
|
action = Reduce(name, len(config.core.symbols), transparent)
|
||||||
self._set_table_action(symbol, action, config)
|
self._set_table_action(symbol, action, config)
|
||||||
|
|
||||||
def set_table_accept(self, symbol: int, config: Configuration):
|
def set_table_accept(self, symbol: int, config: Configuration):
|
||||||
|
|
@ -728,7 +695,7 @@ class TableBuilder(object):
|
||||||
if isinstance(action, Shift):
|
if isinstance(action, Shift):
|
||||||
return self.precedence[symbol]
|
return self.precedence[symbol]
|
||||||
else:
|
else:
|
||||||
return self.precedence[config.name]
|
return self.precedence[config.core.name]
|
||||||
|
|
||||||
def _set_table_action(self, symbol_id: int, action: Action, config: Configuration | None):
|
def _set_table_action(self, symbol_id: int, action: Action, config: Configuration | None):
|
||||||
"""Set the action for 'symbol' in the table row to 'action'.
|
"""Set the action for 'symbol' in the table row to 'action'.
|
||||||
|
|
@ -960,7 +927,7 @@ class GenerateLR0:
|
||||||
beginning. (If the position for config is just before a terminal,
|
beginning. (If the position for config is just before a terminal,
|
||||||
or at the end of the production, then the next set is empty.)
|
or at the end of the production, then the next set is empty.)
|
||||||
"""
|
"""
|
||||||
next = config.next
|
next = config.core.next
|
||||||
if next is None:
|
if next is None:
|
||||||
return ()
|
return ()
|
||||||
else:
|
else:
|
||||||
|
|
@ -984,15 +951,14 @@ class GenerateLR0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
closure.add(config)
|
closure.add(config)
|
||||||
for next_config in self.gen_closure_next(config):
|
pending_next.extend(self.gen_closure_next(config))
|
||||||
pending_next.append(next_config)
|
|
||||||
|
|
||||||
temp = pending
|
temp = pending
|
||||||
pending = pending_next
|
pending = pending_next
|
||||||
pending_next = temp
|
pending_next = temp
|
||||||
pending_next.clear()
|
pending_next.clear()
|
||||||
|
|
||||||
return ConfigSet(closure) # TODO: Why tuple?
|
return ConfigSet(closure)
|
||||||
|
|
||||||
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
|
def gen_successor(self, config_set: typing.Iterable[Configuration], symbol: int) -> ConfigSet:
|
||||||
"""Compute the successor state for the given config set and the
|
"""Compute the successor state for the given config set and the
|
||||||
|
|
@ -1002,9 +968,9 @@ class GenerateLR0:
|
||||||
the symbol.
|
the symbol.
|
||||||
"""
|
"""
|
||||||
seeds = tuple(
|
seeds = tuple(
|
||||||
config.replace_position(config.position + 1)
|
config.replace_position(config.core.position + 1)
|
||||||
for config in config_set
|
for config in config_set
|
||||||
if config.next == symbol
|
if config.core.next == symbol
|
||||||
)
|
)
|
||||||
|
|
||||||
closure = self.gen_closure(seeds)
|
closure = self.gen_closure(seeds)
|
||||||
|
|
@ -1019,7 +985,7 @@ class GenerateLR0:
|
||||||
could possibly see, and figure out which configs sets we get from
|
could possibly see, and figure out which configs sets we get from
|
||||||
those symbols. Those are the successors of this set.)
|
those symbols. Those are the successors of this set.)
|
||||||
"""
|
"""
|
||||||
possible = {config.next for config in config_set if config.next is not None}
|
possible = {config.core.next for config in config_set if config.core.next is not None}
|
||||||
|
|
||||||
next = []
|
next = []
|
||||||
for symbol in possible:
|
for symbol in possible:
|
||||||
|
|
@ -1108,9 +1074,9 @@ class GenerateLR0:
|
||||||
successors = config_sets.successors[config_set_id]
|
successors = config_sets.successors[config_set_id]
|
||||||
|
|
||||||
for config in config_set:
|
for config in config_set:
|
||||||
config_next = config.next
|
config_next = config.core.next
|
||||||
if config_next is None:
|
if config_next is None:
|
||||||
if config.name != self.start_symbol:
|
if config.core.name != self.start_symbol:
|
||||||
for a in self.gen_reduce_set(config):
|
for a in self.gen_reduce_set(config):
|
||||||
builder.set_table_reduce(a, config)
|
builder.set_table_reduce(a, config)
|
||||||
else:
|
else:
|
||||||
|
|
@ -1472,7 +1438,7 @@ class GenerateSLR1(GenerateLR0):
|
||||||
|
|
||||||
In an SLR1 parser, this is the follow set of the config nonterminal.
|
In an SLR1 parser, this is the follow set of the config nonterminal.
|
||||||
"""
|
"""
|
||||||
return self.gen_follow(config.name)
|
return self.gen_follow(config.core.name)
|
||||||
|
|
||||||
|
|
||||||
class GenerateLR1(GenerateSLR1):
|
class GenerateLR1(GenerateSLR1):
|
||||||
|
|
@ -1531,7 +1497,7 @@ class GenerateLR1(GenerateSLR1):
|
||||||
(See the documentation in GenerateLR0 for more information on how
|
(See the documentation in GenerateLR0 for more information on how
|
||||||
this function fits into the whole process, specifically `gen_closure`.)
|
this function fits into the whole process, specifically `gen_closure`.)
|
||||||
"""
|
"""
|
||||||
config_next = config.next
|
config_next = config.core.next
|
||||||
if config_next is None:
|
if config_next is None:
|
||||||
return ()
|
return ()
|
||||||
else:
|
else:
|
||||||
|
|
@ -1590,9 +1556,9 @@ class GenerateLALR(GenerateLR1):
|
||||||
# First, do the actual walk. Don't merge yet: just keep track of all
|
# First, do the actual walk. Don't merge yet: just keep track of all
|
||||||
# the config sets that need to be merged.
|
# the config sets that need to be merged.
|
||||||
#
|
#
|
||||||
F: dict[ConfigSet, list[ConfigSet]] = {}
|
F: dict[CoreSet, list[ConfigSet]] = {}
|
||||||
seen: set[ConfigSet] = set()
|
seen: set[ConfigSet] = set()
|
||||||
successors: list[typing.Tuple[ConfigSet, int, ConfigSet]] = []
|
successors: list[typing.Tuple[CoreSet, int, CoreSet]] = []
|
||||||
pending = [config_set]
|
pending = [config_set]
|
||||||
while len(pending) > 0:
|
while len(pending) > 0:
|
||||||
config_set = pending.pop()
|
config_set = pending.pop()
|
||||||
|
|
@ -1600,7 +1566,7 @@ class GenerateLALR(GenerateLR1):
|
||||||
continue
|
continue
|
||||||
seen.add(config_set)
|
seen.add(config_set)
|
||||||
|
|
||||||
config_set_no_la = ConfigSet(s.clear_lookahead() for s in config_set)
|
config_set_no_la = CoreSet(s.core for s in config_set)
|
||||||
|
|
||||||
existing = F.get(config_set_no_la)
|
existing = F.get(config_set_no_la)
|
||||||
if existing is not None:
|
if existing is not None:
|
||||||
|
|
@ -1609,17 +1575,17 @@ class GenerateLALR(GenerateLR1):
|
||||||
F[config_set_no_la] = [config_set]
|
F[config_set_no_la] = [config_set]
|
||||||
|
|
||||||
for symbol, successor in self.gen_all_successors(config_set):
|
for symbol, successor in self.gen_all_successors(config_set):
|
||||||
successor_no_la = ConfigSet(s.clear_lookahead() for s in successor)
|
successor_no_la = CoreSet(s.core for s in successor)
|
||||||
successors.append((config_set_no_la, symbol, successor_no_la))
|
successors.append((config_set_no_la, symbol, successor_no_la))
|
||||||
pending.append(successor)
|
pending.append(successor)
|
||||||
|
|
||||||
# Now we gathered the sets, merge them all.
|
# Now we gathered the sets, merge them all.
|
||||||
final_sets: dict[ConfigSet, ConfigSet] = {}
|
final_sets: dict[CoreSet, ConfigSet] = {}
|
||||||
for key, config_sets in F.items():
|
for key, config_sets in F.items():
|
||||||
la_merge: dict[Configuration, set[int]] = {}
|
la_merge: dict[ConfigurationCore, set[int]] = {}
|
||||||
for config_set in config_sets:
|
for config_set in config_sets:
|
||||||
for config in config_set:
|
for config in config_set:
|
||||||
la_key = config.clear_lookahead()
|
la_key = config.core
|
||||||
la_set = la_merge.get(la_key)
|
la_set = la_merge.get(la_key)
|
||||||
if la_set is None:
|
if la_set is None:
|
||||||
la_merge[la_key] = set(config.lookahead)
|
la_merge[la_key] = set(config.lookahead)
|
||||||
|
|
@ -1627,7 +1593,8 @@ class GenerateLALR(GenerateLR1):
|
||||||
la_set.update(config.lookahead)
|
la_set.update(config.lookahead)
|
||||||
|
|
||||||
final_set = ConfigSet(
|
final_set = ConfigSet(
|
||||||
config.replace_lookahead(tuple(sorted(la))) for config, la in la_merge.items()
|
Configuration(core=core, lookahead=tuple(sorted(la)))
|
||||||
|
for core, la in la_merge.items()
|
||||||
)
|
)
|
||||||
final_sets[key] = final_set
|
final_sets[key] = final_set
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue