Compare commits

..

No commits in common. "15008493907f6a889eec66bc738ddaac3f7c09c2" and "57de8a5b8547414229dc709b93d22f3bdf60e193" have entirely different histories.

3 changed files with 37 additions and 83 deletions

View file

@ -54,10 +54,8 @@ RSQUARE = Terminal("RightBracket")
class FineGrammar(Grammar): class FineGrammar(Grammar):
def __init__(self): def __init__(self):
super().__init__( super().__init__(
start="File",
precedence=[ precedence=[
(Assoc.RIGHT, [EQUAL]), (Assoc.RIGHT, [EQUAL]),
(Assoc.LEFT, [OR]), (Assoc.LEFT, [OR]),
@ -75,7 +73,7 @@ class FineGrammar(Grammar):
# statement or an expression, prefer the statement. # statement or an expression, prefer the statement.
# #
(Assoc.NONE, [self.if_statement]), (Assoc.NONE, [self.if_statement]),
], ]
) )
@rule("File") @rule("File")

View file

@ -1,4 +1,3 @@
import argparse
import bisect import bisect
import importlib import importlib
import inspect import inspect
@ -288,14 +287,11 @@ class DynamicLexerModule(DynamicModule):
class Harness: class Harness:
grammar_file: str
start_rule: str | None
source: str | None source: str | None
table: parser.ParseTable | None table: parser.ParseTable | None
tree: Tree | None tree: Tree | None
def __init__(self, grammar_file, start_rule, source_path): def __init__(self, start_rule, source_path):
self.grammar_file = grammar_file
self.start_rule = start_rule self.start_rule = start_rule
self.source_path = source_path self.source_path = source_path
@ -303,17 +299,13 @@ class Harness:
self.table = None self.table = None
self.tokens = None self.tokens = None
self.tree = None self.tree = None
self.errors = [] self.errors = None
self.state_count = 0
self.average_entries = 0
self.max_entries = 0
self.grammar_module = DynamicGrammarModule( self.grammar_module = DynamicGrammarModule(
self.grammar_file, None, self.start_rule, generator=parser.GenerateLALR "./grammar.py", None, self.start_rule, generator=parser.GenerateLALR
) )
self.lexer_module = DynamicLexerModule(self.grammar_file, None) self.lexer_module = DynamicLexerModule("./grammar.py", None)
def run(self): def run(self):
while True: while True:
@ -324,7 +316,6 @@ class Harness:
return return
self.update() self.update()
self.render()
def load_grammar(self) -> parser.ParseTable: def load_grammar(self) -> parser.ParseTable:
return self.grammar_module.get() return self.grammar_module.get()
@ -338,51 +329,45 @@ class Harness:
with open(self.source_path, "r", encoding="utf-8") as f: with open(self.source_path, "r", encoding="utf-8") as f:
self.source = f.read() self.source = f.read()
self.tokens = lexer_func(self.source) self.tokens = lexer_func(self.source)
lex_time = time.time() lex_time = time.time()
# print(f"{tokens.lines}") # print(f"{tokens.lines}")
# tokens.dump(end=5) # tokens.dump(end=5)
(tree, errors) = parse(table, self.tokens, trace=None) (tree, errors) = parse(table, self.tokens, trace=None)
parse_time = time.time() parse_time = time.time()
self.tree = tree self.tree = tree
self.errors = errors self.errors = errors
self.parse_elapsed = parse_time - lex_time parse_elapsed = parse_time - lex_time
states = table.actions
self.state_count = len(states)
self.average_entries = sum(len(row) for row in states) / len(states)
self.max_entries = max(len(row) for row in states)
except Exception as e: except Exception as e:
self.tree = None self.tree = None
self.errors = ["Error loading grammar:"] + [ self.errors = ["Error loading grammar:"] + [
" " + l.rstrip() for fl in traceback.format_exception(e) for l in fl.splitlines() " " + l.rstrip() for fl in traceback.format_exception(e) for l in fl.splitlines()
] ]
self.parse_elapsed = time.time() - start_time parse_elapsed = time.time() - start_time
self.state_count = 0 table = None
self.average_entries = 0
self.max_entries = 0
def render(self):
sys.stdout.buffer.write(CLEAR) sys.stdout.buffer.write(CLEAR)
rows, cols = termios.tcgetwinsize(sys.stdout.fileno()) rows, cols = termios.tcgetwinsize(sys.stdout.fileno())
if self.state_count > 0: if table is not None:
states = table.actions
average_entries = sum(len(row) for row in states) / len(states)
max_entries = max(len(row) for row in states)
print( print(
f"{self.state_count} states - {self.average_entries:.3} average, {self.max_entries} max - {self.parse_elapsed:.3}s\r" f"{len(states)} states - {average_entries:.3} average, {max_entries} max - {parse_elapsed:.3}s \r"
) )
else: else:
print(f"No table\r") print("No table\r\n")
print(("\u2500" * cols) + "\r")
if self.tree is not None: if self.tree is not None:
lines = [] lines = []
self.format_node(lines, self.tree) self.format_node(lines, self.tree)
for line in lines[: rows - 3]: for line in lines[: rows - 2]:
print(line[:cols] + "\r") print(line[:cols] + "\r")
else: else:
for error in self.errors[: rows - 3]: for error in self.errors[: rows - 2]:
print(error[:cols] + "\r") print(error[:cols] + "\r")
sys.stdout.flush() sys.stdout.flush()
@ -401,27 +386,10 @@ class Harness:
lines.append((" " * indent) + f"{kind}:'{value}' [{start}, {end})") lines.append((" " * indent) + f"{kind}:'{value}' [{start}, {end})")
def main(args: list[str]): if __name__ == "__main__":
parser = argparse.ArgumentParser(description="An interactive debugging harness for grammars") source_path = None
parser.add_argument("grammar", help="Path to a python file containing the grammar to load") if len(sys.argv) == 2:
parser.add_argument("source_path", help="Path to an input file to parse") source_path = sys.argv[1]
parser.add_argument(
"--grammar-member",
type=str,
default=None,
help="The name of the member in the grammar module to load. The default is to search "
"the module for a class that looks like a Grammar. You should only need to specify "
"this if you have more than one grammar in your module, or if it's hidden somehow.",
)
parser.add_argument(
"--start-rule",
type=str,
default=None,
help="The name of the production to start parsing with. The default is the one "
"specified by the grammar.",
)
parsed = parser.parse_args(args[1:])
fd = sys.stdin.fileno() fd = sys.stdin.fileno()
old_settings = termios.tcgetattr(fd) old_settings = termios.tcgetattr(fd)
@ -430,9 +398,8 @@ def main(args: list[str]):
enter_alt_screen() enter_alt_screen()
h = Harness( h = Harness(
grammar_file=parsed.grammar, start_rule="File",
start_rule=parsed.start_rule, source_path=source_path,
source_path=parsed.source_path,
) )
h.run() h.run()
@ -440,6 +407,6 @@ def main(args: list[str]):
leave_alt_screen() leave_alt_screen()
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
# print(parser_faster.format_table(gen, table))
if __name__ == "__main__": # print()
main(sys.argv) # tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])

View file

@ -1813,10 +1813,7 @@ class Grammar:
Not very exciting, perhaps, but it's something. Not very exciting, perhaps, but it's something.
""" """
_precedence: dict[str, typing.Tuple[Assoc, int]] def __init__(self, precedence: PrecedenceList | None = None):
_start: str
def __init__(self, start: str, precedence: PrecedenceList | None = None):
if precedence is None: if precedence is None:
precedence = getattr(self, "precedence", []) precedence = getattr(self, "precedence", [])
assert precedence is not None assert precedence is not None
@ -1834,10 +1831,9 @@ class Grammar:
precedence_table[key] = (associativity, prec + 1) precedence_table[key] = (associativity, prec + 1)
self._precedence = precedence_table self._precedence = precedence_table
self._start = start
def generate_nonterminal_dict( def generate_nonterminal_dict(
self, start: str | None = None self, start: str
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]: ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
"""Convert the rules into a dictionary of productions. """Convert the rules into a dictionary of productions.
@ -1847,9 +1843,6 @@ class Grammar:
dictionary that maps nonterminal rule name to its associated list of dictionary that maps nonterminal rule name to its associated list of
productions. productions.
""" """
if start is None:
start = self._start
rules = inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)) rules = inspect.getmembers(self, lambda x: isinstance(x, NonTerminal))
nonterminals = {rule.name: rule for _, rule in rules} nonterminals = {rule.name: rule for _, rule in rules}
transparents = {rule.name for _, rule in rules if rule.transparent} transparents = {rule.name for _, rule in rules if rule.transparent}
@ -1879,9 +1872,7 @@ class Grammar:
return (grammar, transparents) return (grammar, transparents)
def desugar( def desugar(self, start: str) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
self, start: str | None = None
) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]:
"""Convert the rules into a flat list of productions. """Convert the rules into a flat list of productions.
Our table generators work from a very flat set of productions. The form Our table generators work from a very flat set of productions. The form
@ -1905,12 +1896,10 @@ class Grammar:
return grammar, transparents return grammar, transparents
def build_table(self, start: str | None, generator=GenerateLALR): def build_table(self, start: str, generator=GenerateLALR):
"""Construct a parse table for this grammar, starting at the named """Construct a parse table for this grammar, starting at the named
nonterminal rule. nonterminal rule.
""" """
if start is None:
start = self._start
desugared, transparents = self.desugar(start) desugared, transparents = self.desugar(start)
gen = generator(start, desugared, precedence=self._precedence, transparents=transparents) gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)