Better formatting, dump full graph, disallow conflicts

This commit is contained in:
John Doty 2024-06-02 08:09:47 -07:00
parent d6131ad53e
commit e57a827e5c

View file

@ -135,6 +135,7 @@ import dataclasses
import enum import enum
import functools import functools
import inspect import inspect
import json
import sys import sys
import typing import typing
@ -282,7 +283,11 @@ class Configuration:
) )
def format(self, alphabet: list[str]) -> str: def format(self, alphabet: list[str]) -> str:
la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else "" if self.lookahead != ():
la = " {" + ",".join(alphabet[i] for i in self.lookahead) + "}"
else:
la = ""
return "{name} -> {bits}{lookahead}".format( return "{name} -> {bits}{lookahead}".format(
name=alphabet[self.name], name=alphabet[self.name],
bits=" ".join( bits=" ".join(
@ -297,7 +302,7 @@ class Configuration:
# ConfigSet = typing.Tuple[Configuration, ...] # ConfigSet = typing.Tuple[Configuration, ...]
class ConfigSet(frozenset): class ConfigSet(frozenset[Configuration]):
pass pass
@ -354,6 +359,21 @@ class ConfigurationSetInfo:
""" """
self.successors[c_id][symbol] = successor self.successors[c_id][symbol] = successor
def dump_state(self, alphabet: list[str]) -> str:
return json.dumps(
{
str(set_index): {
"configs": [c.format(alphabet) for c in config_set],
"successors": {
alphabet[k]: str(v) for k, v in self.successors[set_index].items()
},
}
for set_index, config_set in enumerate(self.sets)
},
indent=4,
sort_keys=True,
)
def find_path_to_set(self, target_set: ConfigSet) -> list[int]: def find_path_to_set(self, target_set: ConfigSet) -> list[int]:
"""Trace the path of grammar symbols from the first set (which always """Trace the path of grammar symbols from the first set (which always
set 0) to the target set. This is useful in conflict reporting, set 0) to the target set. This is useful in conflict reporting,
@ -369,6 +389,8 @@ class ConfigurationSetInfo:
visited = set() visited = set()
queue: collections.deque = collections.deque() queue: collections.deque = collections.deque()
# NOTE: Set 0 is always the first set, the one that contains the
# start symbol.
queue.appendleft((0, [])) queue.appendleft((0, []))
while len(queue) > 0: while len(queue) > 0:
set_index, path = queue.pop() set_index, path = queue.pop()
@ -452,7 +474,7 @@ class AmbiguityError(Exception):
self.ambiguities = ambiguities self.ambiguities = ambiguities
def __str__(self): def __str__(self):
return "The grammar is ambiguous:\n\n" + "\n\n".join( return f"{len(self.ambiguities)} ambiguities:\n\n" + "\n\n".join(
str(ambiguity) for ambiguity in self.ambiguities str(ambiguity) for ambiguity in self.ambiguities
) )
@ -505,7 +527,7 @@ class ErrorCollection:
alphabet: list[str], alphabet: list[str],
all_sets: ConfigurationSetInfo, all_sets: ConfigurationSetInfo,
) -> AmbiguityError | None: ) -> AmbiguityError | None:
"""Format all the errors into a string, or return None if there are no """Format all the errors into an error, or return None if there are no
errors. errors.
We need the alphabet to turn all these integers into something human We need the alphabet to turn all these integers into something human
@ -515,6 +537,9 @@ class ErrorCollection:
if len(self.errors) == 0: if len(self.errors) == 0:
return None return None
# with open("ambiguity.json", mode="w", encoding="utf-8") as aj:
# aj.write(all_sets.dump_state(alphabet))
errors = [] errors = []
for config_set, set_errors in self.errors.items(): for config_set, set_errors in self.errors.items():
path = all_sets.find_path_to_set(config_set) path = all_sets.find_path_to_set(config_set)
@ -1518,7 +1543,7 @@ class GenerateLR1(GenerateSLR1):
lookahead_tuple = tuple(sorted(lookahead)) lookahead_tuple = tuple(sorted(lookahead))
next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple)) next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple))
return tuple(sorted(next)) return tuple(next)
def gen_all_sets(self): def gen_all_sets(self):
"""Generate all of the configuration sets for the grammar. """Generate all of the configuration sets for the grammar.
@ -1951,6 +1976,10 @@ class Grammar:
new_clause = [] new_clause = []
for symbol in clause: for symbol in clause:
if isinstance(symbol, Terminal): if isinstance(symbol, Terminal):
if symbol.value in temp_grammar:
raise ValueError(
f"'{symbol.value}' is the name of both a Terminal and a NonTerminal rule. This will cause problems."
)
new_clause.append(symbol.value) new_clause.append(symbol.value)
else: else:
new_clause.append(symbol) new_clause.append(symbol)
@ -1959,7 +1988,7 @@ class Grammar:
return grammar, transparents return grammar, transparents
def build_table(self, start: str | None, generator=None): def build_table(self, start: str | None = None, generator=None):
"""Construct a parse table for this grammar, starting at the named """Construct a parse table for this grammar, starting at the named
nonterminal rule. nonterminal rule.
""" """