lrparsers/harness.py

import bisect
from dataclasses import dataclass
import enum
import select
import sys
import termios
import tty
import typing

import grammar
import parser

# from parser import Token, Grammar, rule, seq


def trace_state(stack, input, input_index, action):
    print(
        "{stack: <20}  {input: <50}  {action: <5}".format(
            stack=repr([s[0] for s in stack]),
            input=repr(input[input_index : input_index + 4]),
            action=repr(action),
        )
    )


@dataclass
class Tree:
    name: str | None
    children: typing.Tuple["Tree | str", ...]


def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
    """Parse the input with the generated parsing table and return the
    concrete syntax tree.

    The parsing table can be generated by GenerateLR0.gen_table() or by any
    of the other generators below. The parsing mechanism never changes, only
    the table generation mechanism.

    input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
    one on for you.

    This is not a *great* parser, it's really just a demo for what you can
    do with the table.
    """
    input: list[str] = [t.value for (t, _, _) in tokens.tokens]

    assert "$" not in input
    input = input + ["$"]
    input_index = 0

    # Our stack is a stack of tuples, where the first entry is the state number
    # and the second entry is the 'value' that was generated when the state was
    # pushed.
    stack: list[typing.Tuple[int, str | Tree | None]] = [(0, None)]
    while True:
        current_state = stack[-1][0]
        current_token = input[input_index]

        action = table.states[current_state].get(current_token, parser.Error())
        if trace:
            trace(stack, input, input_index, action)

        match action:
            case parser.Accept():
                result = stack[-1][1]
                assert isinstance(result, Tree)
                return (result, [])

            case parser.Reduce(name=name, count=size, transparent=transparent):
                children: list[str | Tree] = []
                for _, c in stack[-size:]:
                    if c is None:
                        continue
                    elif isinstance(c, Tree) and c.name is None:
                        children.extend(c.children)
                    else:
                        children.append(c)

                value = Tree(name=name if not transparent else None, children=tuple(children))
                stack = stack[:-size]

                goto = table.states[stack[-1][0]].get(name, parser.Error())
                assert isinstance(goto, parser.Goto)
                stack.append((goto.state, value))

            case parser.Shift(state):
                stack.append((state, current_token))
                input_index += 1

            case parser.Error():
                if input_index >= len(tokens.tokens):
                    message = "Unexpected end of file"
                    start = tokens.tokens[-1][1]
                else:
                    message = f"Syntax error: unexpected symbol {current_token}"
                    (_, start, _) = tokens.tokens[input_index]

                line_index = bisect.bisect_left(tokens.lines, start)
                if line_index == 0:
                    col_start = 0
                else:
                    col_start = tokens.lines[line_index - 1] + 1
                column_index = start - col_start
                line_index += 1

                error = f"{line_index}:{column_index}: {message}"
                return (None, [error])

            case _:
                raise ValueError(f"Unknown action type: {action}")


# https://en.wikipedia.org/wiki/ANSI_escape_code
# https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797


class CharColor(enum.IntEnum):
    CHAR_COLOR_DEFAULT = 0
    CHAR_COLOR_BLACK = 30
    CHAR_COLOR_RED = enum.auto()
    CHAR_COLOR_GREEN = enum.auto()
    CHAR_COLOR_YELLOW = enum.auto()
    CHAR_COLOR_BLUE = enum.auto()
    CHAR_COLOR_MAGENTA = enum.auto()
    CHAR_COLOR_CYAN = enum.auto()
    CHAR_COLOR_WHITE = enum.auto()  # Really light gray
    CHAR_COLOR_BRIGHT_BLACK = 90  # Really dark gray
    CHAR_COLOR_BRIGHT_RED = enum.auto()
    CHAR_COLOR_BRIGHT_GREEN = enum.auto()
    CHAR_COLOR_BRIGHT_YELLOW = enum.auto()
    CHAR_COLOR_BRIGHT_BLUE = enum.auto()
    CHAR_COLOR_BRIGHT_MAGENTA = enum.auto()
    CHAR_COLOR_BRIGHT_CYAN = enum.auto()
    CHAR_COLOR_BRIGHT_WHITE = enum.auto()


def ESC(x: bytes) -> bytes:
    return b"\033" + x


def CSI(x: bytes) -> bytes:
    return ESC(b"[" + x)


CLEAR = CSI(b"H") + CSI(b"0m")


def enter_alt_screen():
    sys.stdout.buffer.write(CSI(b"?1049h"))


def leave_alt_screen():
    sys.stdout.buffer.write(CSI(b"?1049l"))


class Harness:
    source: str | None
    table: parser.ParseTable | None
    tree: Tree | None

    def __init__(self, lexer_func, grammar_func, start_rule, source_path):
        # self.generator = parser.GenerateLR1
        self.generator = parser.GenerateLALR
        self.lexer_func = lexer_func
        self.grammar_func = grammar_func
        self.start_rule = start_rule
        self.source_path = source_path

        self.source = None
        self.table = None
        self.tokens = None
        self.tree = None
        self.errors = None

    def run(self):
        while True:
            i, _, _ = select.select([sys.stdin], [], [], 1)
            if i:
                k = sys.stdin.read(1)
                print(f"Key {k}\r")
                return

            self.update()

    def update(self):
        if self.table is None:
            self.table = self.grammar_func().build_table(
                start=self.start_rule, generator=self.generator
            )
        assert self.table is not None

        if self.tokens is None:
            with open(self.source_path, "r", encoding="utf-8") as f:
                self.source = f.read()
            self.tokens = self.lexer_func(self.source)

        # print(f"{tokens.lines}")
        # tokens.dump(end=5)
        if self.tree is None and self.errors is None:
            (tree, errors) = parse(self.table, self.tokens, trace=None)
            self.tree = tree
            self.errors = errors

        sys.stdout.buffer.write(CLEAR)
        rows, cols = termios.tcgetwinsize(sys.stdout.fileno())

        states = self.table.states
        average_entries = sum(len(row) for row in states) / len(states)
        max_entries = max(len(row) for row in states)
        print(f"{len(states)} states - {average_entries} average, {max_entries} max\r")

        if self.tree is not None:
            lines = []
            self.format_node(lines, self.tree)
            for line in lines[: rows - 2]:
                print(line[:cols] + "\r")

        sys.stdout.flush()
        sys.stdout.buffer.flush()

    def format_node(self, lines, node: Tree | str, indent=0):
        """Print out an indented concrete syntax tree, from parse()."""
        match node:
            case Tree(name, children):
                lines.append((" " * indent) + (name or "???"))
                for child in children:
                    self.format_node(lines, child, indent + 2)
            case _:
                lines.append((" " * indent) + str(node))


if __name__ == "__main__":
    source_path = None
    if len(sys.argv) == 2:
        source_path = sys.argv[1]

    fd = sys.stdin.fileno()
    old_settings = termios.tcgetattr(fd)
    try:
        tty.setraw(fd)
        enter_alt_screen()

        h = Harness(
            lexer_func=grammar.FineTokens,
            grammar_func=grammar.FineGrammar,
            start_rule="file",
            source_path=source_path,
        )
        h.run()

    finally:
        leave_alt_screen()
        termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)

    # print(parser_faster.format_table(gen, table))
    # print()
    # tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])