[parser] Everything is an ItemSet now
This commit is contained in:
parent
923b01f6fd
commit
385c378edb
1 changed files with 33 additions and 95 deletions
128
parser/parser.py
128
parser/parser.py
|
|
@ -146,7 +146,7 @@ import typing
|
||||||
#
|
#
|
||||||
# We start with LR0 parsers, because they form the basis of everything else.
|
# We start with LR0 parsers, because they form the basis of everything else.
|
||||||
###############################################################################
|
###############################################################################
|
||||||
class ConfigurationCore(typing.NamedTuple):
|
class Configuration(typing.NamedTuple):
|
||||||
"""A core configuration, basically, a position within a rule.
|
"""A core configuration, basically, a position within a rule.
|
||||||
|
|
||||||
These need to be as small and as tight as you can make them. They are
|
These need to be as small and as tight as you can make them. They are
|
||||||
|
|
@ -170,7 +170,7 @@ class ConfigurationCore(typing.NamedTuple):
|
||||||
next = None
|
next = None
|
||||||
else:
|
else:
|
||||||
next = symbols[0]
|
next = symbols[0]
|
||||||
return ConfigurationCore(
|
return Configuration(
|
||||||
name=name,
|
name=name,
|
||||||
symbols=symbols,
|
symbols=symbols,
|
||||||
position=0,
|
position=0,
|
||||||
|
|
@ -186,7 +186,7 @@ class ConfigurationCore(typing.NamedTuple):
|
||||||
next = None
|
next = None
|
||||||
else:
|
else:
|
||||||
next = self.symbols[new_position]
|
next = self.symbols[new_position]
|
||||||
return ConfigurationCore(
|
return Configuration(
|
||||||
name=self.name,
|
name=self.name,
|
||||||
symbols=self.symbols,
|
symbols=self.symbols,
|
||||||
position=new_position,
|
position=new_position,
|
||||||
|
|
@ -222,57 +222,6 @@ class ConfigurationCore(typing.NamedTuple):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class Configuration(typing.NamedTuple):
|
|
||||||
"""A rule being tracked in a state. That is, a specific position within a
|
|
||||||
specific rule, with an associated lookahead state.
|
|
||||||
|
|
||||||
(Note: technically, lookahead isn't used until we get to LR(1) parsers,
|
|
||||||
but if left at its default it's harmless. Ignore it until you get to
|
|
||||||
the part about LR(1).)
|
|
||||||
"""
|
|
||||||
|
|
||||||
core: ConfigurationCore
|
|
||||||
lookahead: typing.Tuple[int, ...]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_rule(cls, name: int, symbols: typing.Tuple[int, ...], lookahead=()):
|
|
||||||
# Consider adding at_end and next to the namedtuple.
|
|
||||||
return Configuration(
|
|
||||||
core=ConfigurationCore.from_rule(name, symbols),
|
|
||||||
lookahead=lookahead,
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def at_end(self) -> bool:
|
|
||||||
return self.core.next is None
|
|
||||||
|
|
||||||
def replace_position(self, new_position):
|
|
||||||
return Configuration(
|
|
||||||
core=self.core.replace_position(new_position),
|
|
||||||
lookahead=self.lookahead,
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def rest(self):
|
|
||||||
return self.core.symbols[(self.core.position + 1) :]
|
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
|
||||||
la = ", " + str(self.lookahead) if self.lookahead != () else ""
|
|
||||||
return f"{repr(self.core)}{la}"
|
|
||||||
|
|
||||||
def format(self, alphabet: list[str]) -> str:
|
|
||||||
if self.lookahead != ():
|
|
||||||
la = " ctx:{" + ",".join(alphabet[i] for i in self.lookahead) + "}"
|
|
||||||
else:
|
|
||||||
la = " ctx:{}"
|
|
||||||
|
|
||||||
return f"{self.core.format(alphabet)}{la}"
|
|
||||||
|
|
||||||
|
|
||||||
class ConfigSet(frozenset[Configuration]):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Here we have a slightly different definition of a ConfigurationSet; we keep
|
# Here we have a slightly different definition of a ConfigurationSet; we keep
|
||||||
# the lookaheads outside and use a dictionary to check for containment
|
# the lookaheads outside and use a dictionary to check for containment
|
||||||
# quickly. ItemSet is used in the GRM/Pager/Chin algorithm.
|
# quickly. ItemSet is used in the GRM/Pager/Chin algorithm.
|
||||||
|
|
@ -286,15 +235,11 @@ class ItemSet:
|
||||||
algorithm.
|
algorithm.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
items: dict[ConfigurationCore, set[int]]
|
items: dict[Configuration, set[int]]
|
||||||
|
|
||||||
def __init__(self, items=None):
|
def __init__(self, items=None):
|
||||||
self.items = items or {}
|
self.items = items or {}
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_config_set(cls, config_set: ConfigSet) -> "ItemSet":
|
|
||||||
return ItemSet({config.core: set(config.lookahead) for config in config_set})
|
|
||||||
|
|
||||||
def weakly_compatible(self, other: "ItemSet") -> bool:
|
def weakly_compatible(self, other: "ItemSet") -> bool:
|
||||||
a = self.items
|
a = self.items
|
||||||
b = other.items
|
b = other.items
|
||||||
|
|
@ -359,11 +304,6 @@ class ItemSet:
|
||||||
result.items[next] = set(context)
|
result.items[next] = set(context)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def to_config_set(self) -> ConfigSet:
|
|
||||||
return ConfigSet(
|
|
||||||
{Configuration(core, tuple(sorted(ctx))) for core, ctx in self.items.items()}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class StateGraph:
|
class StateGraph:
|
||||||
|
|
@ -381,7 +321,7 @@ class StateGraph:
|
||||||
structure, but they all compute this information.)
|
structure, but they all compute this information.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
closures: list[ConfigSet]
|
closures: list[ItemSet]
|
||||||
|
|
||||||
# All the sucessors for all of the sets. `successors[i]` is the mapping
|
# All the sucessors for all of the sets. `successors[i]` is the mapping
|
||||||
# from grammar symbol to the index of the set you get by processing that
|
# from grammar symbol to the index of the set you get by processing that
|
||||||
|
|
@ -392,7 +332,7 @@ class StateGraph:
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
{
|
{
|
||||||
str(set_index): {
|
str(set_index): {
|
||||||
"closures": [c.format(alphabet) for c in closure],
|
"closures": [f"{c.format(alphabet)} -> {l}" for c, l in closure.items.items()],
|
||||||
"successors": {alphabet[k]: str(v) for k, v in successors.items()},
|
"successors": {alphabet[k]: str(v) for k, v in successors.items()},
|
||||||
}
|
}
|
||||||
for set_index, (closure, successors) in enumerate(
|
for set_index, (closure, successors) in enumerate(
|
||||||
|
|
@ -403,14 +343,14 @@ class StateGraph:
|
||||||
sort_keys=True,
|
sort_keys=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def find_path_to_set(self, target_set: ConfigSet) -> list[int]:
|
def find_path_to_set(self, target_set: ItemSet) -> list[int]:
|
||||||
"""Trace the path of grammar symbols from the first set (which always
|
"""Trace the path of grammar symbols from the first set (which always
|
||||||
set 0) to the target set. This is useful in conflict reporting,
|
set 0) to the target set. This is useful in conflict reporting,
|
||||||
because we'll be *at* a ConfigSet and want to show the grammar symbols
|
because we'll be *at* an ItemSet and want to show the grammar symbols
|
||||||
that get us to where we found the conflict.
|
that get us to where we found the conflict.
|
||||||
|
|
||||||
The return value is a list of grammar symbols to get to the specified
|
The return value is a list of grammar symbols to get to the specified
|
||||||
ConfigSet.
|
ItemSet.
|
||||||
|
|
||||||
This function raises KeyError if no path is found.
|
This function raises KeyError if no path is found.
|
||||||
"""
|
"""
|
||||||
|
|
@ -518,7 +458,7 @@ class ErrorCollection:
|
||||||
the error.
|
the error.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
errors: dict[ConfigSet, dict[int, dict[Configuration, Action]]]
|
errors: dict[ItemSet, dict[int, dict[Configuration, Action]]]
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.errors = {}
|
self.errors = {}
|
||||||
|
|
@ -529,7 +469,7 @@ class ErrorCollection:
|
||||||
|
|
||||||
def add_error(
|
def add_error(
|
||||||
self,
|
self,
|
||||||
config_set: ConfigSet,
|
config_set: ItemSet,
|
||||||
symbol: int,
|
symbol: int,
|
||||||
config: Configuration,
|
config: Configuration,
|
||||||
action: Action,
|
action: Action,
|
||||||
|
|
@ -581,11 +521,10 @@ class ErrorCollection:
|
||||||
for symbol, symbol_errors in set_errors.items():
|
for symbol, symbol_errors in set_errors.items():
|
||||||
actions = []
|
actions = []
|
||||||
for config, action in symbol_errors.items():
|
for config, action in symbol_errors.items():
|
||||||
core = config.core
|
name = alphabet[config.name]
|
||||||
name = alphabet[core.name]
|
|
||||||
rule = " ".join(
|
rule = " ".join(
|
||||||
f"{'* ' if core.position == i else ''}{alphabet[s]}"
|
f"{'* ' if config.position == i else ''}{alphabet[s]}"
|
||||||
for i, s in enumerate(core.symbols)
|
for i, s in enumerate(config.symbols)
|
||||||
)
|
)
|
||||||
if config.at_end:
|
if config.at_end:
|
||||||
rule += " *"
|
rule += " *"
|
||||||
|
|
@ -707,7 +646,7 @@ class TableBuilder(object):
|
||||||
|
|
||||||
return ParseTable(actions=self.actions, gotos=self.gotos, trivia=set())
|
return ParseTable(actions=self.actions, gotos=self.gotos, trivia=set())
|
||||||
|
|
||||||
def new_row(self, config_set: ConfigSet):
|
def new_row(self, config_set: ItemSet):
|
||||||
"""Start a new row, processing the given config set. Call this before
|
"""Start a new row, processing the given config set. Call this before
|
||||||
doing anything else.
|
doing anything else.
|
||||||
"""
|
"""
|
||||||
|
|
@ -735,9 +674,9 @@ class TableBuilder(object):
|
||||||
"""Mark a reduce of the given configuration for the given symbol in the
|
"""Mark a reduce of the given configuration for the given symbol in the
|
||||||
current row.
|
current row.
|
||||||
"""
|
"""
|
||||||
name = self.alphabet[config.core.name]
|
name = self.alphabet[config.name]
|
||||||
transparent = name in self.transparents
|
transparent = name in self.transparents
|
||||||
action = Reduce(name, len(config.core.symbols), transparent)
|
action = Reduce(name, len(config.symbols), transparent)
|
||||||
self._set_table_action(symbol, action, config)
|
self._set_table_action(symbol, action, config)
|
||||||
|
|
||||||
def set_table_accept(self, symbol: int, config: Configuration):
|
def set_table_accept(self, symbol: int, config: Configuration):
|
||||||
|
|
@ -768,7 +707,7 @@ class TableBuilder(object):
|
||||||
if isinstance(action, Shift):
|
if isinstance(action, Shift):
|
||||||
return self.precedence[symbol]
|
return self.precedence[symbol]
|
||||||
else:
|
else:
|
||||||
return self.precedence[config.core.name]
|
return self.precedence[config.name]
|
||||||
|
|
||||||
def _set_table_action(self, symbol_id: int, action: ParseAction, config: Configuration | None):
|
def _set_table_action(self, symbol_id: int, action: ParseAction, config: Configuration | None):
|
||||||
"""Set the action for 'symbol' in the table row to 'action'.
|
"""Set the action for 'symbol' in the table row to 'action'.
|
||||||
|
|
@ -1252,7 +1191,7 @@ class ParserGenerator:
|
||||||
self._firsts,
|
self._firsts,
|
||||||
)
|
)
|
||||||
|
|
||||||
def gen_sets(self, seeds: list[Configuration]) -> StateGraph:
|
def gen_sets(self, seeds: ItemSet) -> StateGraph:
|
||||||
# This function can be seen as a modified version of items() from
|
# This function can be seen as a modified version of items() from
|
||||||
# Chen's dissertation.
|
# Chen's dissertation.
|
||||||
#
|
#
|
||||||
|
|
@ -1270,10 +1209,7 @@ class ParserGenerator:
|
||||||
core_states: list[ItemSet] = []
|
core_states: list[ItemSet] = []
|
||||||
edges: list[dict[int, int]] = []
|
edges: list[dict[int, int]] = []
|
||||||
|
|
||||||
# Convert the incoming seed configurations into item sets.
|
core_states.append(seeds)
|
||||||
# TODO: Convert everything to ItemSet natively.
|
|
||||||
state0 = ItemSet({seed.core: set(seed.lookahead) for seed in seeds})
|
|
||||||
core_states.append(state0)
|
|
||||||
closed_states.append(None)
|
closed_states.append(None)
|
||||||
edges.append({})
|
edges.append({})
|
||||||
|
|
||||||
|
|
@ -1399,7 +1335,7 @@ class ParserGenerator:
|
||||||
# Register all the actually merged, final config sets. I should *not*
|
# Register all the actually merged, final config sets. I should *not*
|
||||||
# have to do all this work. Really really garbage.
|
# have to do all this work. Really really garbage.
|
||||||
return StateGraph(
|
return StateGraph(
|
||||||
closures=[closed_state.to_config_set() for _, closed_state in gc_states],
|
closures=[closed_state for _, closed_state in gc_states],
|
||||||
successors=gc_edges,
|
successors=gc_edges,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1492,7 +1428,7 @@ class ParserGenerator:
|
||||||
set of all those productions combined with all the incoming productions
|
set of all those productions combined with all the incoming productions
|
||||||
is the closure.
|
is the closure.
|
||||||
"""
|
"""
|
||||||
closure: dict[ConfigurationCore, set[int]] = {}
|
closure: dict[Configuration, set[int]] = {}
|
||||||
|
|
||||||
# We're going to maintain a set of things to look at, rules that we
|
# We're going to maintain a set of things to look at, rules that we
|
||||||
# still need to close over. Assume that starts with everything in us.
|
# still need to close over. Assume that starts with everything in us.
|
||||||
|
|
@ -1525,7 +1461,7 @@ class ParserGenerator:
|
||||||
lookahead.update(context)
|
lookahead.update(context)
|
||||||
|
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
new_core = ConfigurationCore.from_rule(config_next, rule)
|
new_core = Configuration.from_rule(config_next, rule)
|
||||||
todo.append((new_core, lookahead))
|
todo.append((new_core, lookahead))
|
||||||
|
|
||||||
return ItemSet(closure)
|
return ItemSet(closure)
|
||||||
|
|
@ -1536,10 +1472,12 @@ class ParserGenerator:
|
||||||
In LR1 parsers, we must remember to set the lookahead of the start
|
In LR1 parsers, we must remember to set the lookahead of the start
|
||||||
symbol to '$'.
|
symbol to '$'.
|
||||||
"""
|
"""
|
||||||
seeds = [
|
seeds = ItemSet(
|
||||||
Configuration.from_rule(self.start_symbol, rule, lookahead=(self.end_symbol,))
|
{
|
||||||
for rule in self.grammar[self.start_symbol]
|
Configuration.from_rule(self.start_symbol, rule): {self.end_symbol}
|
||||||
]
|
for rule in self.grammar[self.start_symbol]
|
||||||
|
}
|
||||||
|
)
|
||||||
return self.gen_sets(seeds)
|
return self.gen_sets(seeds)
|
||||||
|
|
||||||
def gen_table(self) -> ParseTable:
|
def gen_table(self) -> ParseTable:
|
||||||
|
|
@ -1579,11 +1517,11 @@ class ParserGenerator:
|
||||||
builder.new_row(config_set)
|
builder.new_row(config_set)
|
||||||
successors = config_sets.successors[config_set_id]
|
successors = config_sets.successors[config_set_id]
|
||||||
|
|
||||||
for config in config_set:
|
for config, lookahead in config_set.items.items():
|
||||||
config_next = config.core.next
|
config_next = config.next
|
||||||
if config_next is None:
|
if config_next is None:
|
||||||
if config.core.name != self.start_symbol:
|
if config.name != self.start_symbol:
|
||||||
for a in config.lookahead:
|
for a in lookahead:
|
||||||
builder.set_table_reduce(a, config)
|
builder.set_table_reduce(a, config)
|
||||||
else:
|
else:
|
||||||
builder.set_table_accept(self.end_symbol, config)
|
builder.set_table_accept(self.end_symbol, config)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue