diff --git a/grammar.py b/grammar.py index 9069ef3..ff2ab3f 100644 --- a/grammar.py +++ b/grammar.py @@ -54,10 +54,8 @@ RSQUARE = Terminal("RightBracket") class FineGrammar(Grammar): - def __init__(self): super().__init__( - start="File", precedence=[ (Assoc.RIGHT, [EQUAL]), (Assoc.LEFT, [OR]), @@ -75,7 +73,7 @@ class FineGrammar(Grammar): # statement or an expression, prefer the statement. # (Assoc.NONE, [self.if_statement]), - ], + ] ) @rule("File") diff --git a/harness.py b/harness.py index d53675c..2590415 100644 --- a/harness.py +++ b/harness.py @@ -1,4 +1,3 @@ -import argparse import bisect import importlib import inspect @@ -288,14 +287,11 @@ class DynamicLexerModule(DynamicModule): class Harness: - grammar_file: str - start_rule: str | None source: str | None table: parser.ParseTable | None tree: Tree | None - def __init__(self, grammar_file, start_rule, source_path): - self.grammar_file = grammar_file + def __init__(self, start_rule, source_path): self.start_rule = start_rule self.source_path = source_path @@ -303,17 +299,13 @@ class Harness: self.table = None self.tokens = None self.tree = None - self.errors = [] - - self.state_count = 0 - self.average_entries = 0 - self.max_entries = 0 + self.errors = None self.grammar_module = DynamicGrammarModule( - self.grammar_file, None, self.start_rule, generator=parser.GenerateLALR + "./grammar.py", None, self.start_rule, generator=parser.GenerateLALR ) - self.lexer_module = DynamicLexerModule(self.grammar_file, None) + self.lexer_module = DynamicLexerModule("./grammar.py", None) def run(self): while True: @@ -324,7 +316,6 @@ class Harness: return self.update() - self.render() def load_grammar(self) -> parser.ParseTable: return self.grammar_module.get() @@ -338,51 +329,45 @@ class Harness: with open(self.source_path, "r", encoding="utf-8") as f: self.source = f.read() - self.tokens = lexer_func(self.source) - lex_time = time.time() + self.tokens = lexer_func(self.source) + lex_time = time.time() - # print(f"{tokens.lines}") - # tokens.dump(end=5) - (tree, errors) = parse(table, self.tokens, trace=None) - parse_time = time.time() - self.tree = tree - self.errors = errors - self.parse_elapsed = parse_time - lex_time - - states = table.actions - self.state_count = len(states) - self.average_entries = sum(len(row) for row in states) / len(states) - self.max_entries = max(len(row) for row in states) + # print(f"{tokens.lines}") + # tokens.dump(end=5) + (tree, errors) = parse(table, self.tokens, trace=None) + parse_time = time.time() + self.tree = tree + self.errors = errors + parse_elapsed = parse_time - lex_time except Exception as e: self.tree = None self.errors = ["Error loading grammar:"] + [ " " + l.rstrip() for fl in traceback.format_exception(e) for l in fl.splitlines() ] - self.parse_elapsed = time.time() - start_time - self.state_count = 0 - self.average_entries = 0 - self.max_entries = 0 + parse_elapsed = time.time() - start_time + table = None - def render(self): sys.stdout.buffer.write(CLEAR) rows, cols = termios.tcgetwinsize(sys.stdout.fileno()) - if self.state_count > 0: + if table is not None: + states = table.actions + average_entries = sum(len(row) for row in states) / len(states) + max_entries = max(len(row) for row in states) print( - f"{self.state_count} states - {self.average_entries:.3} average, {self.max_entries} max - {self.parse_elapsed:.3}s\r" + f"{len(states)} states - {average_entries:.3} average, {max_entries} max - {parse_elapsed:.3}s \r" ) else: - print(f"No table\r") - print(("\u2500" * cols) + "\r") + print("No table\r\n") if self.tree is not None: lines = [] self.format_node(lines, self.tree) - for line in lines[: rows - 3]: + for line in lines[: rows - 2]: print(line[:cols] + "\r") else: - for error in self.errors[: rows - 3]: + for error in self.errors[: rows - 2]: print(error[:cols] + "\r") sys.stdout.flush() @@ -401,27 +386,10 @@ class Harness: lines.append((" " * indent) + f"{kind}:'{value}' [{start}, {end})") -def main(args: list[str]): - parser = argparse.ArgumentParser(description="An interactive debugging harness for grammars") - parser.add_argument("grammar", help="Path to a python file containing the grammar to load") - parser.add_argument("source_path", help="Path to an input file to parse") - parser.add_argument( - "--grammar-member", - type=str, - default=None, - help="The name of the member in the grammar module to load. The default is to search " - "the module for a class that looks like a Grammar. You should only need to specify " - "this if you have more than one grammar in your module, or if it's hidden somehow.", - ) - parser.add_argument( - "--start-rule", - type=str, - default=None, - help="The name of the production to start parsing with. The default is the one " - "specified by the grammar.", - ) - - parsed = parser.parse_args(args[1:]) +if __name__ == "__main__": + source_path = None + if len(sys.argv) == 2: + source_path = sys.argv[1] fd = sys.stdin.fileno() old_settings = termios.tcgetattr(fd) @@ -430,9 +398,8 @@ def main(args: list[str]): enter_alt_screen() h = Harness( - grammar_file=parsed.grammar, - start_rule=parsed.start_rule, - source_path=parsed.source_path, + start_rule="File", + source_path=source_path, ) h.run() @@ -440,6 +407,6 @@ def main(args: list[str]): leave_alt_screen() termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) - -if __name__ == "__main__": - main(sys.argv) + # print(parser_faster.format_table(gen, table)) + # print() + # tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"]) diff --git a/parser.py b/parser.py index 371b7e5..e79a48d 100644 --- a/parser.py +++ b/parser.py @@ -1813,10 +1813,7 @@ class Grammar: Not very exciting, perhaps, but it's something. """ - _precedence: dict[str, typing.Tuple[Assoc, int]] - _start: str - - def __init__(self, start: str, precedence: PrecedenceList | None = None): + def __init__(self, precedence: PrecedenceList | None = None): if precedence is None: precedence = getattr(self, "precedence", []) assert precedence is not None @@ -1834,10 +1831,9 @@ class Grammar: precedence_table[key] = (associativity, prec + 1) self._precedence = precedence_table - self._start = start def generate_nonterminal_dict( - self, start: str | None = None + self, start: str ) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]: """Convert the rules into a dictionary of productions. @@ -1847,9 +1843,6 @@ class Grammar: dictionary that maps nonterminal rule name to its associated list of productions. """ - if start is None: - start = self._start - rules = inspect.getmembers(self, lambda x: isinstance(x, NonTerminal)) nonterminals = {rule.name: rule for _, rule in rules} transparents = {rule.name for _, rule in rules if rule.transparent} @@ -1879,9 +1872,7 @@ class Grammar: return (grammar, transparents) - def desugar( - self, start: str | None = None - ) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]: + def desugar(self, start: str) -> typing.Tuple[list[typing.Tuple[str, list[str]]], set[str]]: """Convert the rules into a flat list of productions. Our table generators work from a very flat set of productions. The form @@ -1905,12 +1896,10 @@ class Grammar: return grammar, transparents - def build_table(self, start: str | None, generator=GenerateLALR): + def build_table(self, start: str, generator=GenerateLALR): """Construct a parse table for this grammar, starting at the named nonterminal rule. """ - if start is None: - start = self._start desugared, transparents = self.desugar(start) gen = generator(start, desugared, precedence=self._precedence, transparents=transparents)