diff --git a/grammar.py b/grammar.py index 90d88ef..0912700 100644 --- a/grammar.py +++ b/grammar.py @@ -349,7 +349,7 @@ class FineGrammar(Grammar): IN = Terminal("in", kind=TerminalKind.Keyword.Operator) LCURLY = Terminal("{", kind=TerminalKind.Punctuation.CurlyBrace.Open) RCURLY = Terminal("}", kind=TerminalKind.Punctuation.CurlyBrace.Close) - LET = Terminal("let", kind=TerminalKind.Keyword.Other) + LET = Terminal("Let", kind=TerminalKind.Keyword.Other) RETURN = Terminal("return", kind=TerminalKind.Keyword.Control) SEMICOLON = Terminal(";", kind=TerminalKind.Punctuation.Separator) STRING = Terminal( diff --git a/harness.py b/harness.py index 4e94866..90cee9c 100644 --- a/harness.py +++ b/harness.py @@ -22,6 +22,11 @@ from parser import runtime # from parser import Token, Grammar, rule, seq +############################################################################### +# Parsing Stuff +############################################################################### + + ############################################################################### # Screen Stuff ############################################################################### @@ -79,18 +84,13 @@ def goto_cursor(x: int, y: int): # Dynamic Modules: Detect and Reload Modules when they Change ############################################################################### -VERSION = 0 -MT = typing.TypeVar("MT") - - -class DynamicModule[MT]: +class DynamicModule: file_name: str member_name: str | None last_time: float | None module: types.ModuleType | None - value: MT | None def __init__(self, file_name, member_name): self.file_name = file_name @@ -110,18 +110,15 @@ class DynamicModule[MT]: return True - def _transform(self, value) -> MT: + def _transform(self, value): return value - def get(self) -> MT: + def get(self): st = os.stat(self.file_name) if self.last_time == st.st_mtime: assert self.value is not None return self.value - global VERSION - VERSION += 1 - self.value = None if self.module is None: @@ -153,7 +150,7 @@ class DynamicModule[MT]: return self.value -class DynamicGrammarModule(DynamicModule[parser.ParseTable]): +class DynamicGrammarModule(DynamicModule): def __init__(self, file_name, member_name, start_rule): super().__init__(file_name, member_name) @@ -172,24 +169,16 @@ class DynamicGrammarModule(DynamicModule[parser.ParseTable]): return value().build_table(start=self.start_rule) -class DynamicLexerModule(DynamicModule[typing.Callable[[str], runtime.TokenStream]]): +class DynamicLexerModule(DynamicModule): def _predicate(self, member) -> bool: if not super()._predicate(member): return False - if getattr(member, "terminals", None): + if getattr(member, "tokens", None): return True return False - def _transform(self, value): - lexer_table = parser.compile_lexer(value()) - - def get_tokens(src: str) -> runtime.TokenStream: - return runtime.GenericTokenStream(src, lexer_table) - - return get_tokens - class DisplayMode(enum.Enum): TREE = 0 @@ -231,8 +220,6 @@ class Harness: line_start: int last_cols: int - last_version: int - def __init__( self, grammar_file, grammar_member, lexer_file, lexer_member, start_rule, source_path ): @@ -243,12 +230,9 @@ class Harness: self.start_rule = start_rule self.source_path = source_path - self.last_version = -1 - self.mode = DisplayMode.TREE self.source = None - self.table = None self.tokens = None self.tree = None @@ -266,7 +250,7 @@ class Harness: self.grammar_file, self.grammar_member, self.start_rule ) - self.lexer_module = DynamicLexerModule(self.lexer_file, self.grammar_member) + self.lexer_module = DynamicLexerModule(self.lexer_file, self.lexer_member) self.log_handler = ListHandler() logging.basicConfig(level=logging.INFO, handlers=[self.log_handler]) @@ -302,25 +286,15 @@ class Harness: return self.grammar_module.get() def update(self): - global VERSION - + self.log_handler.clear() start_time = time.time() try: table = self.load_grammar() lexer_func = self.lexer_module.get() with open(self.source_path, "r", encoding="utf-8") as f: - source = f.read() - if source != self.source: - VERSION += 1 - self.source = source + self.source = f.read() - if VERSION == self.last_version: - return # Just stop, do nothing, it's all the same. - self.last_version = VERSION - assert self.source is not None - - self.log_handler.clear() self.tokens = lexer_func(self.source) lex_time = time.time() @@ -347,33 +321,6 @@ class Harness: self.average_entries = 0 self.max_entries = 0 - # WHAT - try: - with open("tree.txt", "w", encoding="utf-8") as f: - lines = [] - if self.tree is not None: - self.format_node(lines, self.tree) - f.writelines([f"{l}\n" for l in lines]) - except Exception as e: - self.errors.extend([f"Unable to write tree.txt: {e}"]) - - try: - with open("errors.txt", "w", encoding="utf-8") as f: - f.writelines([f"{l}\n" for l in self.errors]) - except Exception as e: - self.errors.extend([f"Unable to write errors.txt: {e}"]) - - try: - with open("parse.log", "w", encoding="utf-8") as f: - f.writelines([f"{l}\n" for l in self.log_handler.logs]) - except Exception as e: - self.errors.extend([f"Unable to write parse.log: {e}"]) - - if hasattr(self.tokens, "dump"): - lines = self.tokens.dump() - with open("tokens.txt", "w", encoding="utf-8") as f: - f.writelines([f"{l}\n" for l in lines]) - def render(self): sys.stdout.buffer.write(CLEAR) rows, cols = termios.tcgetwinsize(sys.stdout.fileno()) diff --git a/parser/runtime.py b/parser/runtime.py index 24e617f..2aefd7b 100644 --- a/parser/runtime.py +++ b/parser/runtime.py @@ -1,7 +1,7 @@ import bisect import enum +import enum import logging -import re import typing from dataclasses import dataclass @@ -490,55 +490,3 @@ def generic_tokenize( pos = last_accept_pos start = pos state = 0 - - -class GenericTokenStream: - def __init__(self, src: str, lexer: parser.LexerTable): - self.src = src - self.lexer = lexer - self._tokens: list[typing.Tuple[parser.Terminal, int, int]] = list( - generic_tokenize(src, lexer) - ) - self._lines = [m.start() for m in re.finditer("\n", src)] - - def tokens(self): - return self._tokens - - def lines(self): - return self._lines - - def dump(self, *, start=None, end=None) -> list[str]: - if start is None: - start = 0 - if end is None: - end = len(self._tokens) - - max_terminal_name = max( - len(terminal.value) - for terminal, _ in self.lexer - if terminal is not None and terminal.value is not None - ) - max_offset_len = len(str(len(self.src))) - - prev_line = None - lines = [] - for token in self._tokens[start:end]: - (kind, start, length) = token - line_index = bisect.bisect_left(self._lines, start) - if line_index == 0: - col_start = 0 - else: - col_start = self._lines[line_index - 1] + 1 - column_index = start - col_start - value = self.src[start : start + length] - - line_number = line_index + 1 - if line_number != prev_line: - line_part = f"{line_number:4}" - prev_line = line_number - else: - line_part = " |" - - line = f"{start:{max_offset_len}} {line_part} {column_index:3} {kind.value:{max_terminal_name}} {repr(value)}" - lines.append(line) - return lines