lrparsers/harness.py

import bisect
import importlib
import inspect
import enum
import os
import select
import sys
import termios
import time
import tty
import typing
from dataclasses import dataclass

import grammar
import parser

# from parser import Token, Grammar, rule, seq


def trace_state(stack, input, input_index, action):
    print(
        "{stack: <20}  {input: <50}  {action: <5}".format(
            stack=repr([s[0] for s in stack]),
            input=repr(input[input_index : input_index + 4]),
            action=repr(action),
        )
    )


@dataclass
class Tree:
    name: str | None
    children: typing.Tuple["Tree | str", ...]


def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
    """Parse the input with the generated parsing table and return the
    concrete syntax tree.

    The parsing table can be generated by GenerateLR0.gen_table() or by any
    of the other generators below. The parsing mechanism never changes, only
    the table generation mechanism.

    input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
    one on for you.

    This is not a *great* parser, it's really just a demo for what you can
    do with the table.
    """
    input: list[str] = [t.value for (t, _, _) in tokens.tokens]

    assert "$" not in input
    input = input + ["$"]
    input_index = 0

    # Our stack is a stack of tuples, where the first entry is the state number
    # and the second entry is the 'value' that was generated when the state was
    # pushed.
    stack: list[typing.Tuple[int, str | Tree | None]] = [(0, None)]
    while True:
        current_state = stack[-1][0]
        current_token = input[input_index]

        action = table.states[current_state].get(current_token, parser.Error())
        if trace:
            trace(stack, input, input_index, action)

        match action:
            case parser.Accept():
                result = stack[-1][1]
                assert isinstance(result, Tree)
                return (result, [])

            case parser.Reduce(name=name, count=size, transparent=transparent):
                children: list[str | Tree] = []
                for _, c in stack[-size:]:
                    if c is None:
                        continue
                    elif isinstance(c, Tree) and c.name is None:
                        children.extend(c.children)
                    else:
                        children.append(c)

                value = Tree(name=name if not transparent else None, children=tuple(children))
                stack = stack[:-size]

                goto = table.states[stack[-1][0]].get(name, parser.Error())
                assert isinstance(goto, parser.Goto)
                stack.append((goto.state, value))

            case parser.Shift(state):
                stack.append((state, current_token))
                input_index += 1

            case parser.Error():
                if input_index >= len(tokens.tokens):
                    message = "Unexpected end of file"
                    start = tokens.tokens[-1][1]
                else:
                    message = f"Syntax error: unexpected symbol {current_token}"
                    (_, start, _) = tokens.tokens[input_index]

                line_index = bisect.bisect_left(tokens.lines, start)
                if line_index == 0:
                    col_start = 0
                else:
                    col_start = tokens.lines[line_index - 1] + 1
                column_index = start - col_start
                line_index += 1

                error = f"{line_index}:{column_index}: {message}"
                return (None, [error])

            case _:
                raise ValueError(f"Unknown action type: {action}")


# https://en.wikipedia.org/wiki/ANSI_escape_code
# https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797


class CharColor(enum.IntEnum):
    CHAR_COLOR_DEFAULT = 0
    CHAR_COLOR_BLACK = 30
    CHAR_COLOR_RED = enum.auto()
    CHAR_COLOR_GREEN = enum.auto()
    CHAR_COLOR_YELLOW = enum.auto()
    CHAR_COLOR_BLUE = enum.auto()
    CHAR_COLOR_MAGENTA = enum.auto()
    CHAR_COLOR_CYAN = enum.auto()
    CHAR_COLOR_WHITE = enum.auto()  # Really light gray
    CHAR_COLOR_BRIGHT_BLACK = 90  # Really dark gray
    CHAR_COLOR_BRIGHT_RED = enum.auto()
    CHAR_COLOR_BRIGHT_GREEN = enum.auto()
    CHAR_COLOR_BRIGHT_YELLOW = enum.auto()
    CHAR_COLOR_BRIGHT_BLUE = enum.auto()
    CHAR_COLOR_BRIGHT_MAGENTA = enum.auto()
    CHAR_COLOR_BRIGHT_CYAN = enum.auto()
    CHAR_COLOR_BRIGHT_WHITE = enum.auto()


def ESC(x: bytes) -> bytes:
    return b"\033" + x


def CSI(x: bytes) -> bytes:
    return ESC(b"[" + x)


CLEAR = CSI(b"2J")


def enter_alt_screen():
    sys.stdout.buffer.write(CSI(b"?1049h"))


def leave_alt_screen():
    sys.stdout.buffer.write(CSI(b"?1049l"))


class Harness:
    source: str | None
    table: parser.ParseTable | None
    tree: Tree | None

    def __init__(self, lexer_func, start_rule, source_path):
        # self.generator = parser.GenerateLR1
        self.generator = parser.GenerateLALR
        self.lexer_func = lexer_func
        self.start_rule = start_rule
        self.source_path = source_path

        self.source = None
        self.table = None
        self.tokens = None
        self.tree = None
        self.errors = None

        self.grammar_file_name = "./grammar.py"
        self.last_grammar_time = None
        self.grammar_module = None
        self.grammar_name = None

    def run(self):
        while True:
            i, _, _ = select.select([sys.stdin], [], [], 1)
            if i:
                k = sys.stdin.read(1)
                print(f"Key {k}\r")
                return

            self.update()

    #    def should_reload_grammar(self):

    def load_grammar(self) -> parser.ParseTable:
        st = os.stat(self.grammar_file_name)
        if self.last_grammar_time == st.st_mtime:
            assert self.table is not None
            return self.table

        self.table = None

        if self.grammar_module is None:
            mod_name = inspect.getmodulename(self.grammar_file_name)
            if mod_name is None:
                raise Exception(f"{self.grammar_file_name} does not seem to be a module")
            self.grammar_module = importlib.import_module(mod_name)
        else:
            importlib.reload(self.grammar_module)

        def is_grammar(cls):
            if not inspect.isclass(cls):
                return False

            assert self.grammar_module is not None
            if cls.__module__ != self.grammar_module.__name__:
                return False

            if getattr(cls, "build_table", None):
                return True

            return False

        if self.grammar_name is None:
            classes = inspect.getmembers(self.grammar_module, is_grammar)
            if len(classes) == 0:
                raise Exception(f"No grammars found in {self.grammar_file_name}")
            if len(classes) > 1:
                raise Exception(
                    f"{len(classes)} grammars found in {self.grammar_file_name}: {', '.join(c[0] for c in classes)}"
                )
            grammar_func = classes[0][1]
        else:
            cls = getattr(self.grammar_module, self.grammar_name)
            if cls is None:
                raise Exception(f"Cannot find {self.grammar_name} in {self.grammar_file_name}")
            if not is_grammar(cls):
                raise Exception(
                    f"{self.grammar_name} in {self.grammar_file_name} does not seem to be a grammar"
                )
            grammar_func = cls

        self.table = grammar_func().build_table(start=self.start_rule, generator=self.generator)
        self.last_grammar_time = st.st_mtime

        assert self.table is not None
        return self.table

    def update(self):
        start_time = time.time()
        try:
            table = self.load_grammar()

            with open(self.source_path, "r", encoding="utf-8") as f:
                self.source = f.read()

                self.tokens = self.lexer_func(self.source)
                lex_time = time.time()

                # print(f"{tokens.lines}")
                # tokens.dump(end=5)
                (tree, errors) = parse(table, self.tokens, trace=None)
                parse_time = time.time()
                self.tree = tree
                self.errors = errors
                parse_elapsed = parse_time - lex_time

        except Exception as e:
            self.tree = None
            self.errors = [f"Error loading grammar: {e}"]
            parse_elapsed = time.time() - start_time
            table = None

        sys.stdout.buffer.write(CLEAR)
        rows, cols = termios.tcgetwinsize(sys.stdout.fileno())

        if table is not None:
            states = table.states
            average_entries = sum(len(row) for row in states) / len(states)
            max_entries = max(len(row) for row in states)
            print(
                f"{len(states)} states - {average_entries:.3} average, {max_entries} max - {parse_elapsed:.3}s      \r"
            )
        else:
            print("No table\r\n")

        if self.tree is not None:
            lines = []
            self.format_node(lines, self.tree)
            for line in lines[: rows - 2]:
                print(line[:cols] + "\r")
        else:
            for error in self.errors[: rows - 2]:
                print(error[:cols] + "\r")

        sys.stdout.flush()
        sys.stdout.buffer.flush()

    def format_node(self, lines, node: Tree | str, indent=0):
        """Print out an indented concrete syntax tree, from parse()."""
        match node:
            case Tree(name, children):
                lines.append((" " * indent) + (name or "???"))
                for child in children:
                    self.format_node(lines, child, indent + 2)
            case _:
                lines.append((" " * indent) + str(node))


if __name__ == "__main__":
    source_path = None
    if len(sys.argv) == 2:
        source_path = sys.argv[1]

    fd = sys.stdin.fileno()
    old_settings = termios.tcgetattr(fd)
    try:
        tty.setraw(fd)
        enter_alt_screen()

        h = Harness(
            lexer_func=grammar.FineTokens,
            start_rule="file",
            source_path=source_path,
        )
        h.run()

    finally:
        leave_alt_screen()
        termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)

    # print(parser_faster.format_table(gen, table))
    # print()
    # tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])