[parser] Remove Canonical LR1 generator
This is fine probably.
This commit is contained in:
parent
2b72811486
commit
27e6bb413c
2 changed files with 177 additions and 317 deletions
485
parser/parser.py
485
parser/parser.py
|
|
@ -1098,16 +1098,36 @@ class FollowInfo:
|
||||||
return FollowInfo(follows=follows)
|
return FollowInfo(follows=follows)
|
||||||
|
|
||||||
|
|
||||||
class GenerateLR1:
|
class ParserGenerator:
|
||||||
"""Generate parse tables for LR1, or "canonical LR" grammars.
|
"""Generate parse tables for LR1 grammars.
|
||||||
|
|
||||||
LR1 parsers can recognize more than SLR parsers. Like SLR parsers, they
|
This class implements a variant of pager's algorithm to generate the parse
|
||||||
are choosier about when they reduce. But unlike SLR parsers, they specify
|
tables, which support the same set of languages as Canonical LR1 but with
|
||||||
the terminals on which they reduce by carrying a 'lookahead' terminal in
|
much smaller resulting parse tables.
|
||||||
the configuration. The lookahead of a configuration is computed as the
|
|
||||||
closure of a configuration set is computed, so see gen_closure_next for
|
I'll be honest, I don't understnd this one as well as the pure LR1
|
||||||
details. (Except for the start configuration, which has '$' as its
|
algorithm. It proceeds as LR1, generating successor states, but every
|
||||||
lookahead.)
|
time it makes a new state it searches the states it has already made for
|
||||||
|
one that is "weakly compatible;" if it finds one it merges the new state
|
||||||
|
with the old state and marks the old state to be re-visited.
|
||||||
|
|
||||||
|
The implementation here follows from the implementation in
|
||||||
|
`GRMTools<https://github.com/softdevteam/grmtools/blob/master/lrtable/src/lib/pager.rs>`_.
|
||||||
|
|
||||||
|
As they explain there:
|
||||||
|
|
||||||
|
> The general algorithms that form the basis of what's used in this file
|
||||||
|
> can be found in:
|
||||||
|
>
|
||||||
|
> A Practical General Method for Constructing LR(k) Parsers
|
||||||
|
> David Pager, Acta Informatica 7, 249--268, 1977
|
||||||
|
>
|
||||||
|
> However Pager's paper is dense, and doesn't name sub-parts of the
|
||||||
|
> algorithm. We mostly reference the (still incomplete, but less
|
||||||
|
> incomplete) version of the algorithm found in:
|
||||||
|
>
|
||||||
|
> Measuring and extending LR(1) parser generation
|
||||||
|
> Xin Chen, PhD thesis, University of Hawaii, 2009
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Internally we use integers as symbols, not strings. Mostly this is fine,
|
# Internally we use integers as symbols, not strings. Mostly this is fine,
|
||||||
|
|
@ -1170,9 +1190,9 @@ class GenerateLR1:
|
||||||
non-terminal being added, and the second elment of the tuple is the
|
non-terminal being added, and the second elment of the tuple is the
|
||||||
list of terminals and non-terminals that make up the production.
|
list of terminals and non-terminals that make up the production.
|
||||||
|
|
||||||
There is currently no support for custom actions or alternation or
|
There is no support for alternation. If you want alternations that
|
||||||
anything like that. If you want alternations that you'll have to lower
|
you'll have to lower the grammar by hand into the simpler form first,
|
||||||
the grammar by hand into the simpler form first.
|
but that's what the Grammar and NonTerminal classes are for.
|
||||||
|
|
||||||
Don't name anything with double-underscores; those are reserved for
|
Don't name anything with double-underscores; those are reserved for
|
||||||
the generator. Don't add '$' either, as it is reserved to mean
|
the generator. Don't add '$' either, as it is reserved to mean
|
||||||
|
|
@ -1272,302 +1292,6 @@ class GenerateLR1:
|
||||||
self._firsts,
|
self._firsts,
|
||||||
)
|
)
|
||||||
|
|
||||||
def gen_closure(self, seeds: typing.Iterable[Configuration]) -> ConfigSet:
|
|
||||||
"""Compute the closure for the specified configs. The closure is all
|
|
||||||
of the configurations we could be in. Specifically, if the position
|
|
||||||
for a config is just before a non-terminal then we must also consider
|
|
||||||
configurations where the rule is the rule for the non-terminal and
|
|
||||||
the position is just before the beginning of the rule.
|
|
||||||
|
|
||||||
(We have replaced a recursive version with an iterative one.)
|
|
||||||
"""
|
|
||||||
closure: set[Configuration] = set()
|
|
||||||
pending = list(seeds)
|
|
||||||
pending_next = []
|
|
||||||
while len(pending) > 0:
|
|
||||||
for config in pending:
|
|
||||||
if config in closure:
|
|
||||||
continue
|
|
||||||
|
|
||||||
closure.add(config)
|
|
||||||
pending_next.extend(self.gen_closure_next(config))
|
|
||||||
|
|
||||||
temp = pending
|
|
||||||
pending = pending_next
|
|
||||||
pending_next = temp
|
|
||||||
pending_next.clear()
|
|
||||||
|
|
||||||
# NOTE: The generation of this closure *might* have generated
|
|
||||||
# multiple cores with different lookaheads; if that's
|
|
||||||
# the case we need to merge.
|
|
||||||
merged: dict[ConfigurationCore, set[int]] = {}
|
|
||||||
for c in closure:
|
|
||||||
existing = merged.get(c.core)
|
|
||||||
if existing is not None:
|
|
||||||
existing.update(c.lookahead)
|
|
||||||
else:
|
|
||||||
merged[c.core] = set(c.lookahead)
|
|
||||||
|
|
||||||
return ConfigSet(Configuration(k, tuple(sorted(v))) for k, v in merged.items())
|
|
||||||
|
|
||||||
def gen_all_successors(
|
|
||||||
self, config_set: typing.Iterable[Configuration]
|
|
||||||
) -> list[typing.Tuple[int, ConfigSet]]:
|
|
||||||
"""Return all of the non-empty successors for the given config set.
|
|
||||||
|
|
||||||
(That is, given the config set, pretend we see all the symbols we
|
|
||||||
could possibly see, and figure out which configs sets we get from
|
|
||||||
those symbols. Those are the successors of this set.)
|
|
||||||
"""
|
|
||||||
possible = {config.core.next for config in config_set if config.core.next is not None}
|
|
||||||
|
|
||||||
next = []
|
|
||||||
for symbol in possible:
|
|
||||||
seeds = ConfigSet(
|
|
||||||
config.replace_position(config.core.position + 1)
|
|
||||||
for config in config_set
|
|
||||||
if config.core.next == symbol
|
|
||||||
)
|
|
||||||
if len(seeds) > 0:
|
|
||||||
next.append((symbol, seeds))
|
|
||||||
|
|
||||||
return next
|
|
||||||
|
|
||||||
def gen_sets(self, seeds: list[Configuration]) -> StateGraph:
|
|
||||||
"""Generate all configuration sets starting from the provided seeds."""
|
|
||||||
result = StateGraph()
|
|
||||||
|
|
||||||
successors = []
|
|
||||||
pending = [ConfigSet(seeds)]
|
|
||||||
pending_next = []
|
|
||||||
while len(pending) > 0:
|
|
||||||
for core in pending:
|
|
||||||
id, is_new = result.register_core(core)
|
|
||||||
if is_new:
|
|
||||||
config_set = self.gen_closure(core)
|
|
||||||
result.register_config_closure(id, config_set)
|
|
||||||
for symbol, successor in self.gen_all_successors(config_set):
|
|
||||||
successors.append((id, symbol, successor))
|
|
||||||
pending_next.append(successor)
|
|
||||||
|
|
||||||
temp = pending
|
|
||||||
pending = pending_next
|
|
||||||
pending_next = temp
|
|
||||||
pending_next.clear()
|
|
||||||
|
|
||||||
for id, symbol, successor in successors:
|
|
||||||
result.add_successor(id, symbol, result.core_key[successor])
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def gen_follow(self, symbol: int) -> set[int]:
|
|
||||||
"""Generate the follow set for the given nonterminal.
|
|
||||||
|
|
||||||
The follow set for a nonterminal is the set of terminals that can
|
|
||||||
follow the nonterminal in a valid sentence. The resulting set never
|
|
||||||
contains epsilon and is never empty, since we should always at least
|
|
||||||
ground out at '$', which is the end-of-stream marker.
|
|
||||||
|
|
||||||
See FollowInfo for more information on how this is determined.
|
|
||||||
"""
|
|
||||||
return self._follows.follows[symbol]
|
|
||||||
|
|
||||||
def gen_first(self, symbols: typing.Iterable[int]) -> typing.Tuple[set[int], bool]:
|
|
||||||
"""Return the first set for a *sequence* of symbols.
|
|
||||||
|
|
||||||
(This is more than FIRST: we need to know the first thing that can
|
|
||||||
happen in this particular sequence right here.)
|
|
||||||
|
|
||||||
Build the set by combining the first sets of the symbols from left to
|
|
||||||
right as long as epsilon remains in the first set. If we reach the end
|
|
||||||
and every symbol has had epsilon, then this set also has epsilon.
|
|
||||||
|
|
||||||
Otherwise we can stop as soon as we get to a non-epsilon first(), and
|
|
||||||
our result does not have epsilon.
|
|
||||||
"""
|
|
||||||
result = set()
|
|
||||||
for s in symbols:
|
|
||||||
result.update(self._firsts.firsts[s])
|
|
||||||
if not self._firsts.is_epsilon[s]:
|
|
||||||
return (result, False)
|
|
||||||
|
|
||||||
return (result, True)
|
|
||||||
|
|
||||||
def gen_reduce_set(self, config: Configuration) -> typing.Iterable[int]:
|
|
||||||
"""Return the set of symbols that indicate we should reduce the given
|
|
||||||
config.
|
|
||||||
|
|
||||||
In an LR1 parser, this is the lookahead of the configuration.
|
|
||||||
"""
|
|
||||||
return config.lookahead
|
|
||||||
|
|
||||||
def gen_closure_next(self, config: Configuration):
|
|
||||||
"""Return the next set of configurations in the closure for config.
|
|
||||||
|
|
||||||
In LR1 parsers, we must compute the lookahead for the configurations
|
|
||||||
we're adding to the closure. The lookahead for the new configurations
|
|
||||||
is the first() of the rest of this config's production. If that
|
|
||||||
contains epsilon, then the lookahead *also* contains the lookahead we
|
|
||||||
already have. (This lookahead was presumably generated by the same
|
|
||||||
process, so in some sense it is a 'parent' lookahead, or a lookahead
|
|
||||||
from an upstream production in the grammar.)
|
|
||||||
|
|
||||||
(See the documentation in GenerateLR0 for more information on how
|
|
||||||
this function fits into the whole process, specifically `gen_closure`.)
|
|
||||||
"""
|
|
||||||
config_next = config.core.next
|
|
||||||
if config_next is None:
|
|
||||||
return ()
|
|
||||||
else:
|
|
||||||
lookahead, epsilon = self.gen_first(config.rest)
|
|
||||||
if epsilon:
|
|
||||||
lookahead.update(config.lookahead)
|
|
||||||
lookahead_tuple = tuple(sorted(lookahead))
|
|
||||||
|
|
||||||
next = []
|
|
||||||
for rule in self.grammar[config_next]:
|
|
||||||
rr = Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple)
|
|
||||||
next.append(rr)
|
|
||||||
|
|
||||||
return tuple(next)
|
|
||||||
|
|
||||||
def gen_closure_x(self, items: ItemSet) -> ItemSet:
|
|
||||||
closure: dict[ConfigurationCore, set[int]] = {}
|
|
||||||
|
|
||||||
# We're going to maintain a set of things to look at, rules that we
|
|
||||||
# still need to close over. Assume that starts with everything in us.
|
|
||||||
todo = [(core, context) for core, context in items.items.items()]
|
|
||||||
while len(todo) > 0:
|
|
||||||
core, context = todo.pop()
|
|
||||||
|
|
||||||
existing_context = closure.get(core)
|
|
||||||
if existing_context is None or not context <= existing_context:
|
|
||||||
# Either context is none or something in context is not in
|
|
||||||
# existing_context, so we need to process this one.
|
|
||||||
if existing_context is not None:
|
|
||||||
existing_context.update(context)
|
|
||||||
else:
|
|
||||||
# NOTE: context in the set is a lookahead and got
|
|
||||||
# generated exactly once for all the child rules.
|
|
||||||
# we have to copy somewhere, this here seems best.
|
|
||||||
closure[core] = set(context)
|
|
||||||
|
|
||||||
config_next = core.next
|
|
||||||
if config_next is None:
|
|
||||||
# No closure for this one, we're at the end.
|
|
||||||
continue
|
|
||||||
|
|
||||||
rules = self.grammar[config_next]
|
|
||||||
if len(rules) > 0:
|
|
||||||
lookahead, epsilon = self.gen_first(core.rest)
|
|
||||||
print(f" LA {core.rest} -> {lookahead} e:{epsilon}")
|
|
||||||
if epsilon:
|
|
||||||
lookahead.update(context)
|
|
||||||
|
|
||||||
for rule in rules:
|
|
||||||
new_core = ConfigurationCore.from_rule(config_next, rule)
|
|
||||||
todo.append((new_core, lookahead))
|
|
||||||
|
|
||||||
return ItemSet(closure)
|
|
||||||
|
|
||||||
def gen_all_sets(self):
|
|
||||||
"""Generate all of the configuration sets for the grammar.
|
|
||||||
|
|
||||||
In LR1 parsers, we must remember to set the lookahead of the start
|
|
||||||
symbol to '$'.
|
|
||||||
"""
|
|
||||||
seeds = [
|
|
||||||
Configuration.from_rule(self.start_symbol, rule, lookahead=(self.end_symbol,))
|
|
||||||
for rule in self.grammar[self.start_symbol]
|
|
||||||
]
|
|
||||||
return self.gen_sets(seeds)
|
|
||||||
|
|
||||||
def gen_table(self) -> ParseTable:
|
|
||||||
"""Generate the parse table.
|
|
||||||
|
|
||||||
The parse table is a list of states. The first state in the list is
|
|
||||||
the starting state. Each state is a dictionary that maps a symbol to an
|
|
||||||
action. Each action is a tuple. The first element of the tuple is a
|
|
||||||
string describing what to do:
|
|
||||||
|
|
||||||
- 'shift': The second element of the tuple is the state
|
|
||||||
number. Consume the input and push that state onto the stack.
|
|
||||||
|
|
||||||
- 'reduce': The second element is the name of the non-terminal being
|
|
||||||
reduced, and the third element is the number of states to remove
|
|
||||||
from the stack. Don't consume the input; just remove the specified
|
|
||||||
number of things from the stack, and then consult the table again,
|
|
||||||
this time using the new top-of-stack as the current state and the
|
|
||||||
name of the non-terminal to find out what to do.
|
|
||||||
|
|
||||||
- 'goto': The second element is the state number to push onto the
|
|
||||||
stack. In the literature, these entries are treated distinctly from
|
|
||||||
the actions, but we mix them here because they never overlap with the
|
|
||||||
other actions. (These are always associated with non-terminals, and
|
|
||||||
the other actions are always associated with terminals.)
|
|
||||||
|
|
||||||
- 'accept': Accept the result of the parse, it worked.
|
|
||||||
|
|
||||||
Anything missing from the row indicates an error.
|
|
||||||
"""
|
|
||||||
config_sets = self.gen_all_sets()
|
|
||||||
# print(config_sets.dump_state(self.alphabet))
|
|
||||||
builder = TableBuilder(self.alphabet, self.precedence, self.transparents)
|
|
||||||
|
|
||||||
for config_set_id, config_set in enumerate(config_sets.closures):
|
|
||||||
assert config_set is not None
|
|
||||||
builder.new_row(config_set)
|
|
||||||
successors = config_sets.successors[config_set_id]
|
|
||||||
|
|
||||||
for config in config_set:
|
|
||||||
config_next = config.core.next
|
|
||||||
if config_next is None:
|
|
||||||
if config.core.name != self.start_symbol:
|
|
||||||
for a in self.gen_reduce_set(config):
|
|
||||||
builder.set_table_reduce(a, config)
|
|
||||||
else:
|
|
||||||
builder.set_table_accept(self.end_symbol, config)
|
|
||||||
|
|
||||||
elif self.terminal[config_next]:
|
|
||||||
index = successors[config_next]
|
|
||||||
builder.set_table_shift(config_next, index, config)
|
|
||||||
|
|
||||||
# Gotos
|
|
||||||
for symbol, index in successors.items():
|
|
||||||
if self.nonterminal[symbol]:
|
|
||||||
builder.set_table_goto(symbol, index)
|
|
||||||
|
|
||||||
return builder.flush(config_sets)
|
|
||||||
|
|
||||||
|
|
||||||
class GeneratePager(GenerateLR1):
|
|
||||||
"""Pager's algorithm.
|
|
||||||
|
|
||||||
I'll be honest, I don't understnd this one as well as the pure LR1
|
|
||||||
algorithm. It proceeds as LR1, generating successor states, but every
|
|
||||||
time it makes a new state it searches the states it has already made for
|
|
||||||
one that is "weakly compatible;" ifit finds one it merges the new state
|
|
||||||
with the old state and marks the old state to be re-visited.
|
|
||||||
|
|
||||||
The implementation here follows from the implementation in
|
|
||||||
`GRMTools<https://github.com/softdevteam/grmtools/blob/master/lrtable/src/lib/pager.rs>`_.
|
|
||||||
|
|
||||||
As they explain there:
|
|
||||||
|
|
||||||
> The general algorithms that form the basis of what's used in this file
|
|
||||||
> can be found in:
|
|
||||||
>
|
|
||||||
> A Practical General Method for Constructing LR(k) Parsers
|
|
||||||
> David Pager, Acta Informatica 7, 249--268, 1977
|
|
||||||
>
|
|
||||||
> However Pager's paper is dense, and doesn't name sub-parts of the
|
|
||||||
> algorithm. We mostly reference the (still incomplete, but less
|
|
||||||
> incomplete) version of the algorithm found in:
|
|
||||||
>
|
|
||||||
> Measuring and extending LR(1) parser generation
|
|
||||||
> Xin Chen, PhD thesis, University of Hawaii, 2009
|
|
||||||
"""
|
|
||||||
|
|
||||||
def gen_sets(self, seeds: list[Configuration]) -> StateGraph:
|
def gen_sets(self, seeds: list[Configuration]) -> StateGraph:
|
||||||
# This function can be seen as a modified version of items() from
|
# This function can be seen as a modified version of items() from
|
||||||
# Chen's dissertation.
|
# Chen's dissertation.
|
||||||
|
|
@ -1626,7 +1350,7 @@ class GeneratePager(GenerateLR1):
|
||||||
todo_off = state_i + 1
|
todo_off = state_i + 1
|
||||||
todo -= 1
|
todo -= 1
|
||||||
|
|
||||||
cl_state = self.gen_closure_x(core_states[state_i])
|
cl_state = self.gen_closure(core_states[state_i])
|
||||||
closed_states[state_i] = cl_state
|
closed_states[state_i] = cl_state
|
||||||
|
|
||||||
seen.clear()
|
seen.clear()
|
||||||
|
|
@ -1782,6 +1506,143 @@ class GeneratePager(GenerateLR1):
|
||||||
|
|
||||||
return (gc_states, gc_edges)
|
return (gc_states, gc_edges)
|
||||||
|
|
||||||
|
def gen_first(self, symbols: typing.Iterable[int]) -> typing.Tuple[set[int], bool]:
|
||||||
|
"""Return the first set for a *sequence* of symbols.
|
||||||
|
|
||||||
|
(This is more than FIRST: we need to know the first thing that can
|
||||||
|
happen in this particular sequence right here.)
|
||||||
|
|
||||||
|
Build the set by combining the first sets of the symbols from left to
|
||||||
|
right as long as epsilon remains in the first set. If we reach the end
|
||||||
|
and every symbol has had epsilon, then this set also has epsilon.
|
||||||
|
|
||||||
|
Otherwise we can stop as soon as we get to a non-epsilon first(), and
|
||||||
|
our result does not have epsilon.
|
||||||
|
"""
|
||||||
|
result = set()
|
||||||
|
for s in symbols:
|
||||||
|
result.update(self._firsts.firsts[s])
|
||||||
|
if not self._firsts.is_epsilon[s]:
|
||||||
|
return (result, False)
|
||||||
|
|
||||||
|
return (result, True)
|
||||||
|
|
||||||
|
def gen_closure(self, items: ItemSet) -> ItemSet:
|
||||||
|
"""Generate the closure of the given ItemSet.
|
||||||
|
|
||||||
|
Some of the configurations the ItemSet might be positioned right before
|
||||||
|
nonterminals. In that case, obviously, we should *also* behave as if we
|
||||||
|
were right at the beginning of each production for that nonterminal. The
|
||||||
|
set of all those productions combined with all the incoming productions
|
||||||
|
is the closure.
|
||||||
|
"""
|
||||||
|
closure: dict[ConfigurationCore, set[int]] = {}
|
||||||
|
|
||||||
|
# We're going to maintain a set of things to look at, rules that we
|
||||||
|
# still need to close over. Assume that starts with everything in us.
|
||||||
|
todo = [(core, context) for core, context in items.items.items()]
|
||||||
|
while len(todo) > 0:
|
||||||
|
core, context = todo.pop()
|
||||||
|
|
||||||
|
existing_context = closure.get(core)
|
||||||
|
if existing_context is None or not context <= existing_context:
|
||||||
|
# Either context is none or something in context is not in
|
||||||
|
# existing_context, so we need to process this one.
|
||||||
|
if existing_context is not None:
|
||||||
|
existing_context.update(context)
|
||||||
|
else:
|
||||||
|
# NOTE: context in the set is a lookahead and got
|
||||||
|
# generated exactly once for all the child rules.
|
||||||
|
# we have to copy somewhere, this here seems best.
|
||||||
|
closure[core] = set(context)
|
||||||
|
|
||||||
|
config_next = core.next
|
||||||
|
if config_next is None:
|
||||||
|
# No closure for this one, we're at the end.
|
||||||
|
continue
|
||||||
|
|
||||||
|
rules = self.grammar[config_next]
|
||||||
|
if len(rules) > 0:
|
||||||
|
lookahead, epsilon = self.gen_first(core.rest)
|
||||||
|
print(f" LA {core.rest} -> {lookahead} e:{epsilon}")
|
||||||
|
if epsilon:
|
||||||
|
lookahead.update(context)
|
||||||
|
|
||||||
|
for rule in rules:
|
||||||
|
new_core = ConfigurationCore.from_rule(config_next, rule)
|
||||||
|
todo.append((new_core, lookahead))
|
||||||
|
|
||||||
|
return ItemSet(closure)
|
||||||
|
|
||||||
|
def gen_all_sets(self):
|
||||||
|
"""Generate all of the configuration sets for the grammar.
|
||||||
|
|
||||||
|
In LR1 parsers, we must remember to set the lookahead of the start
|
||||||
|
symbol to '$'.
|
||||||
|
"""
|
||||||
|
seeds = [
|
||||||
|
Configuration.from_rule(self.start_symbol, rule, lookahead=(self.end_symbol,))
|
||||||
|
for rule in self.grammar[self.start_symbol]
|
||||||
|
]
|
||||||
|
return self.gen_sets(seeds)
|
||||||
|
|
||||||
|
def gen_table(self) -> ParseTable:
|
||||||
|
"""Generate the parse table.
|
||||||
|
|
||||||
|
The parse table is a list of states. The first state in the list is
|
||||||
|
the starting state. Each state is a dictionary that maps a symbol to an
|
||||||
|
action. Each action is a tuple. The first element of the tuple is a
|
||||||
|
string describing what to do:
|
||||||
|
|
||||||
|
- 'shift': The second element of the tuple is the state
|
||||||
|
number. Consume the input and push that state onto the stack.
|
||||||
|
|
||||||
|
- 'reduce': The second element is the name of the non-terminal being
|
||||||
|
reduced, and the third element is the number of states to remove
|
||||||
|
from the stack. Don't consume the input; just remove the specified
|
||||||
|
number of things from the stack, and then consult the table again,
|
||||||
|
this time using the new top-of-stack as the current state and the
|
||||||
|
name of the non-terminal to find out what to do.
|
||||||
|
|
||||||
|
- 'goto': The second element is the state number to push onto the
|
||||||
|
stack. In the literature, these entries are treated distinctly from
|
||||||
|
the actions, but we mix them here because they never overlap with the
|
||||||
|
other actions. (These are always associated with non-terminals, and
|
||||||
|
the other actions are always associated with terminals.)
|
||||||
|
|
||||||
|
- 'accept': Accept the result of the parse, it worked.
|
||||||
|
|
||||||
|
Anything missing from the row indicates an error.
|
||||||
|
"""
|
||||||
|
config_sets = self.gen_all_sets()
|
||||||
|
# print(config_sets.dump_state(self.alphabet))
|
||||||
|
builder = TableBuilder(self.alphabet, self.precedence, self.transparents)
|
||||||
|
|
||||||
|
for config_set_id, config_set in enumerate(config_sets.closures):
|
||||||
|
assert config_set is not None
|
||||||
|
builder.new_row(config_set)
|
||||||
|
successors = config_sets.successors[config_set_id]
|
||||||
|
|
||||||
|
for config in config_set:
|
||||||
|
config_next = config.core.next
|
||||||
|
if config_next is None:
|
||||||
|
if config.core.name != self.start_symbol:
|
||||||
|
for a in config.lookahead:
|
||||||
|
builder.set_table_reduce(a, config)
|
||||||
|
else:
|
||||||
|
builder.set_table_accept(self.end_symbol, config)
|
||||||
|
|
||||||
|
elif self.terminal[config_next]:
|
||||||
|
index = successors[config_next]
|
||||||
|
builder.set_table_shift(config_next, index, config)
|
||||||
|
|
||||||
|
# Gotos
|
||||||
|
for symbol, index in successors.items():
|
||||||
|
if self.nonterminal[symbol]:
|
||||||
|
builder.set_table_goto(symbol, index)
|
||||||
|
|
||||||
|
return builder.flush(config_sets)
|
||||||
|
|
||||||
|
|
||||||
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
|
FlattenedWithMetadata = list["str|Terminal|tuple[dict[str,typing.Any],FlattenedWithMetadata]"]
|
||||||
|
|
||||||
|
|
@ -3008,7 +2869,7 @@ class Grammar:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
_precedence: dict[str, typing.Tuple[Assoc, int]]
|
||||||
_generator: type[GenerateLR1]
|
_generator: type[ParserGenerator]
|
||||||
_terminals: dict[str, Terminal]
|
_terminals: dict[str, Terminal]
|
||||||
_nonterminals: dict[str, NonTerminal]
|
_nonterminals: dict[str, NonTerminal]
|
||||||
_trivia: list[Terminal]
|
_trivia: list[Terminal]
|
||||||
|
|
@ -3017,7 +2878,7 @@ class Grammar:
|
||||||
self,
|
self,
|
||||||
start: str | NonTerminal | None = None,
|
start: str | NonTerminal | None = None,
|
||||||
precedence: PrecedenceList | None = None,
|
precedence: PrecedenceList | None = None,
|
||||||
generator: type[GenerateLR1] | None = None,
|
generator: type[ParserGenerator] | None = None,
|
||||||
trivia: list[str | Terminal] | None = None,
|
trivia: list[str | Terminal] | None = None,
|
||||||
name: str | None = None,
|
name: str | None = None,
|
||||||
):
|
):
|
||||||
|
|
@ -3036,7 +2897,7 @@ class Grammar:
|
||||||
assert precedence is not None
|
assert precedence is not None
|
||||||
|
|
||||||
if generator is None:
|
if generator is None:
|
||||||
generator = getattr(self, "generator", GeneratePager)
|
generator = getattr(self, "generator", ParserGenerator)
|
||||||
assert generator is not None
|
assert generator is not None
|
||||||
|
|
||||||
if trivia is None:
|
if trivia is None:
|
||||||
|
|
|
||||||
|
|
@ -87,8 +87,8 @@ def test_all_generators():
|
||||||
|
|
||||||
GENERATORS = [
|
GENERATORS = [
|
||||||
# parser.GenerateLR0,
|
# parser.GenerateLR0,
|
||||||
parser.GeneratePager,
|
# parser.GeneratePager,
|
||||||
parser.GenerateLR1,
|
parser.ParserGenerator,
|
||||||
]
|
]
|
||||||
for generator in GENERATORS:
|
for generator in GENERATORS:
|
||||||
table = G().build_table(generator=generator)
|
table = G().build_table(generator=generator)
|
||||||
|
|
@ -119,15 +119,14 @@ def test_grammar_aho_ullman_2():
|
||||||
A = Terminal("a")
|
A = Terminal("a")
|
||||||
B = Terminal("b")
|
B = Terminal("b")
|
||||||
|
|
||||||
TestGrammar().build_table(generator=parser.GenerateLR1)
|
TestGrammar().build_table(generator=parser.ParserGenerator)
|
||||||
TestGrammar().build_table(generator=parser.GeneratePager)
|
# TestGrammar().build_table(generator=parser.GeneratePager)
|
||||||
|
|
||||||
|
|
||||||
def test_fun_lalr():
|
def test_fun_lalr():
|
||||||
|
|
||||||
class TestGrammar(Grammar):
|
class TestGrammar(Grammar):
|
||||||
start = "S"
|
start = "S"
|
||||||
generator = parser.GeneratePager
|
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def S(self):
|
def S(self):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue