From a7b4a3ce8b652988d1d5eb17095aadc3122fe2cb Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 31 May 2024 06:29:57 -0700 Subject: [PATCH 1/2] Grammars have implicit start symbols --- grammar.py | 4 +++- parser.py | 19 +++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/grammar.py b/grammar.py index ff2ab3f..9069ef3 100644 --- a/grammar.py +++ b/grammar.py @@ -54,8 +54,10 @@ RSQUARE = Terminal("RightBracket") class FineGrammar(Grammar): + def __init__(self): super().__init__( + start="File", precedence=[ (Assoc.RIGHT, [EQUAL]), (Assoc.LEFT, [OR]), @@ -73,7 +75,7 @@ class FineGrammar(Grammar): # statement or an expression, prefer the statement. # (Assoc.NONE, [self.if_statement]), - ] + ], ) @rule("File") diff --git a/parser.py b/parser.py index e79a48d..371b7e5 100644 --- a/parser.py +++ b/parser.py @@ -1813,7 +1813,10 @@ class Grammar: Not very exciting, perhaps, but it's something. """ - def __init__(self, precedence: PrecedenceList | None = None): + _precedence: dict[str, typing.Tuple[Assoc, int]] + _start: str + + def __init__(self, start: str, precedence: PrecedenceList | None = None): if precedence is None: precedence = getattr(self, "precedence", []) assert precedence is not None @@ -1831,9 +1834,10 @@ class Grammar: precedence_table[key] = (associativity, prec + 1) self._precedence = precedence_table + self._start = start def generate_nonterminal_dict( - self, start: str + self, start: str | None = None ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]: """Convert the rules into a dictionary of productions. @@ -1843,6 +1847,9 @@ class Grammar: dictionary that maps nonterminal rule name to its associated list of productions. """ + if start is None: + start = self._start + rules = inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)) nonterminals = {rule.name: rule for _, rule in rules} transparents = {rule.name for _, rule in rules if rule.transparent} @@ -1872,7 +1879,9 @@ class Grammar: return (grammar, transparents) - def desugar(self, start: str) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]: + def desugar( + self, start: str | None = None + ) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]: """Convert the rules into a flat list of productions. Our table generators work from a very flat set of productions. The form @@ -1896,10 +1905,12 @@ class Grammar: return grammar, transparents - def build_table(self, start: str, generator=GenerateLALR): + def build_table(self, start: str | None, generator=GenerateLALR): """Construct a parse table for this grammar, starting at the named nonterminal rule. """ + if start is None: + start = self._start desugared, transparents = self.desugar(start) gen = generator(start, desugared, precedence=self._precedence, transparents=transparents) From 15008493907f6a889eec66bc738ddaac3f7c09c2 Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 31 May 2024 06:30:13 -0700 Subject: [PATCH 2/2] Argument parsing, render refactor --- harness.py | 97 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 32 deletions(-) diff --git a/harness.py b/harness.py index 2590415..d53675c 100644 --- a/harness.py +++ b/harness.py @@ -1,3 +1,4 @@ +import argparse import bisect import importlib import inspect @@ -287,11 +288,14 @@ class DynamicLexerModule(DynamicModule): class Harness: + grammar_file: str + start_rule: str | None source: str | None table: parser.ParseTable | None tree: Tree | None - def __init__(self, start_rule, source_path): + def __init__(self, grammar_file, start_rule, source_path): + self.grammar_file = grammar_file self.start_rule = start_rule self.source_path = source_path @@ -299,13 +303,17 @@ class Harness: self.table = None self.tokens = None self.tree = None - self.errors = None + self.errors = [] + + self.state_count = 0 + self.average_entries = 0 + self.max_entries = 0 self.grammar_module = DynamicGrammarModule( - "./grammar.py", None, self.start_rule, generator=parser.GenerateLALR + self.grammar_file, None, self.start_rule, generator=parser.GenerateLALR ) - self.lexer_module = DynamicLexerModule("./grammar.py", None) + self.lexer_module = DynamicLexerModule(self.grammar_file, None) def run(self): while True: @@ -316,6 +324,7 @@ class Harness: return self.update() + self.render() def load_grammar(self) -> parser.ParseTable: return self.grammar_module.get() @@ -329,45 +338,51 @@ class Harness: with open(self.source_path, "r", encoding="utf-8") as f: self.source = f.read() - self.tokens = lexer_func(self.source) - lex_time = time.time() + self.tokens = lexer_func(self.source) + lex_time = time.time() - # print(f"{tokens.lines}") - # tokens.dump(end=5) - (tree, errors) = parse(table, self.tokens, trace=None) - parse_time = time.time() - self.tree = tree - self.errors = errors - parse_elapsed = parse_time - lex_time + # print(f"{tokens.lines}") + # tokens.dump(end=5) + (tree, errors) = parse(table, self.tokens, trace=None) + parse_time = time.time() + self.tree = tree + self.errors = errors + self.parse_elapsed = parse_time - lex_time + + states = table.actions + self.state_count = len(states) + self.average_entries = sum(len(row) for row in states) / len(states) + self.max_entries = max(len(row) for row in states) except Exception as e: self.tree = None self.errors = ["Error loading grammar:"] + [ " " + l.rstrip() for fl in traceback.format_exception(e) for l in fl.splitlines() ] - parse_elapsed = time.time() - start_time - table = None + self.parse_elapsed = time.time() - start_time + self.state_count = 0 + self.average_entries = 0 + self.max_entries = 0 + def render(self): sys.stdout.buffer.write(CLEAR) rows, cols = termios.tcgetwinsize(sys.stdout.fileno()) - if table is not None: - states = table.actions - average_entries = sum(len(row) for row in states) / len(states) - max_entries = max(len(row) for row in states) + if self.state_count > 0: print( - f"{len(states)} states - {average_entries:.3} average, {max_entries} max - {parse_elapsed:.3}s \r" + f"{self.state_count} states - {self.average_entries:.3} average, {self.max_entries} max - {self.parse_elapsed:.3}s\r" ) else: - print("No table\r\n") + print(f"No table\r") + print(("\u2500" * cols) + "\r") if self.tree is not None: lines = [] self.format_node(lines, self.tree) - for line in lines[: rows - 2]: + for line in lines[: rows - 3]: print(line[:cols] + "\r") else: - for error in self.errors[: rows - 2]: + for error in self.errors[: rows - 3]: print(error[:cols] + "\r") sys.stdout.flush() @@ -386,10 +401,27 @@ class Harness: lines.append((" " * indent) + f"{kind}:'{value}' [{start}, {end})") -if __name__ == "__main__": - source_path = None - if len(sys.argv) == 2: - source_path = sys.argv[1] +def main(args: list[str]): + parser = argparse.ArgumentParser(description="An interactive debugging harness for grammars") + parser.add_argument("grammar", help="Path to a python file containing the grammar to load") + parser.add_argument("source_path", help="Path to an input file to parse") + parser.add_argument( + "--grammar-member", + type=str, + default=None, + help="The name of the member in the grammar module to load. The default is to search " + "the module for a class that looks like a Grammar. You should only need to specify " + "this if you have more than one grammar in your module, or if it's hidden somehow.", + ) + parser.add_argument( + "--start-rule", + type=str, + default=None, + help="The name of the production to start parsing with. The default is the one " + "specified by the grammar.", + ) + + parsed = parser.parse_args(args[1:]) fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) @@ -398,8 +430,9 @@ if __name__ == "__main__": enter_alt_screen() h = Harness( - start_rule="File", - source_path=source_path, + grammar_file=parsed.grammar, + start_rule=parsed.start_rule, + source_path=parsed.source_path, ) h.run() @@ -407,6 +440,6 @@ if __name__ == "__main__": leave_alt_screen() termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) - # print(parser_faster.format_table(gen, table)) - # print() - # tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"]) + +if __name__ == "__main__": + main(sys.argv)