258 lines
7.8 KiB
Python
258 lines
7.8 KiB
Python
import bisect
|
|
from dataclasses import dataclass
|
|
import enum
|
|
import select
|
|
import sys
|
|
import termios
|
|
import tty
|
|
import typing
|
|
|
|
import grammar
|
|
import parser
|
|
|
|
# from parser import Token, Grammar, rule, seq
|
|
|
|
|
|
def trace_state(stack, input, input_index, action):
|
|
print(
|
|
"{stack: <20} {input: <50} {action: <5}".format(
|
|
stack=repr([s[0] for s in stack]),
|
|
input=repr(input[input_index : input_index + 4]),
|
|
action=repr(action),
|
|
)
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class Tree:
|
|
name: str | None
|
|
children: typing.Tuple["Tree | str", ...]
|
|
|
|
|
|
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
|
|
"""Parse the input with the generated parsing table and return the
|
|
concrete syntax tree.
|
|
|
|
The parsing table can be generated by GenerateLR0.gen_table() or by any
|
|
of the other generators below. The parsing mechanism never changes, only
|
|
the table generation mechanism.
|
|
|
|
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
|
|
one on for you.
|
|
|
|
This is not a *great* parser, it's really just a demo for what you can
|
|
do with the table.
|
|
"""
|
|
input: list[str] = [t.value for (t, _, _) in tokens.tokens]
|
|
|
|
assert "$" not in input
|
|
input = input + ["$"]
|
|
input_index = 0
|
|
|
|
# Our stack is a stack of tuples, where the first entry is the state number
|
|
# and the second entry is the 'value' that was generated when the state was
|
|
# pushed.
|
|
stack: list[typing.Tuple[int, str | Tree | None]] = [(0, None)]
|
|
while True:
|
|
current_state = stack[-1][0]
|
|
current_token = input[input_index]
|
|
|
|
action = table.states[current_state].get(current_token, parser.Error())
|
|
if trace:
|
|
trace(stack, input, input_index, action)
|
|
|
|
match action:
|
|
case parser.Accept():
|
|
result = stack[-1][1]
|
|
assert isinstance(result, Tree)
|
|
return (result, [])
|
|
|
|
case parser.Reduce(name=name, count=size, transparent=transparent):
|
|
children: list[str | Tree] = []
|
|
for _, c in stack[-size:]:
|
|
if c is None:
|
|
continue
|
|
elif isinstance(c, Tree) and c.name is None:
|
|
children.extend(c.children)
|
|
else:
|
|
children.append(c)
|
|
|
|
value = Tree(name=name if not transparent else None, children=tuple(children))
|
|
stack = stack[:-size]
|
|
|
|
goto = table.states[stack[-1][0]].get(name, parser.Error())
|
|
assert isinstance(goto, parser.Goto)
|
|
stack.append((goto.state, value))
|
|
|
|
case parser.Shift(state):
|
|
stack.append((state, current_token))
|
|
input_index += 1
|
|
|
|
case parser.Error():
|
|
if input_index >= len(tokens.tokens):
|
|
message = "Unexpected end of file"
|
|
start = tokens.tokens[-1][1]
|
|
else:
|
|
message = f"Syntax error: unexpected symbol {current_token}"
|
|
(_, start, _) = tokens.tokens[input_index]
|
|
|
|
line_index = bisect.bisect_left(tokens.lines, start)
|
|
if line_index == 0:
|
|
col_start = 0
|
|
else:
|
|
col_start = tokens.lines[line_index - 1] + 1
|
|
column_index = start - col_start
|
|
line_index += 1
|
|
|
|
error = f"{line_index}:{column_index}: {message}"
|
|
return (None, [error])
|
|
|
|
case _:
|
|
raise ValueError(f"Unknown action type: {action}")
|
|
|
|
|
|
# https://en.wikipedia.org/wiki/ANSI_escape_code
|
|
# https://gist.github.com/fnky/458719343aabd01cfb17a3a4f7296797
|
|
|
|
|
|
class CharColor(enum.IntEnum):
|
|
CHAR_COLOR_DEFAULT = 0
|
|
CHAR_COLOR_BLACK = 30
|
|
CHAR_COLOR_RED = enum.auto()
|
|
CHAR_COLOR_GREEN = enum.auto()
|
|
CHAR_COLOR_YELLOW = enum.auto()
|
|
CHAR_COLOR_BLUE = enum.auto()
|
|
CHAR_COLOR_MAGENTA = enum.auto()
|
|
CHAR_COLOR_CYAN = enum.auto()
|
|
CHAR_COLOR_WHITE = enum.auto() # Really light gray
|
|
CHAR_COLOR_BRIGHT_BLACK = 90 # Really dark gray
|
|
CHAR_COLOR_BRIGHT_RED = enum.auto()
|
|
CHAR_COLOR_BRIGHT_GREEN = enum.auto()
|
|
CHAR_COLOR_BRIGHT_YELLOW = enum.auto()
|
|
CHAR_COLOR_BRIGHT_BLUE = enum.auto()
|
|
CHAR_COLOR_BRIGHT_MAGENTA = enum.auto()
|
|
CHAR_COLOR_BRIGHT_CYAN = enum.auto()
|
|
CHAR_COLOR_BRIGHT_WHITE = enum.auto()
|
|
|
|
|
|
def ESC(x: bytes) -> bytes:
|
|
return b"\033" + x
|
|
|
|
|
|
def CSI(x: bytes) -> bytes:
|
|
return ESC(b"[" + x)
|
|
|
|
|
|
CLEAR = CSI(b"H") + CSI(b"0m")
|
|
|
|
|
|
def enter_alt_screen():
|
|
sys.stdout.buffer.write(CSI(b"?1049h"))
|
|
|
|
|
|
def leave_alt_screen():
|
|
sys.stdout.buffer.write(CSI(b"?1049l"))
|
|
|
|
|
|
class Harness:
|
|
source: str | None
|
|
table: parser.ParseTable | None
|
|
tree: Tree | None
|
|
|
|
def __init__(self, lexer_func, grammar_func, start_rule, source_path):
|
|
# self.generator = parser.GenerateLR1
|
|
self.generator = parser.GenerateLALR
|
|
self.lexer_func = lexer_func
|
|
self.grammar_func = grammar_func
|
|
self.start_rule = start_rule
|
|
self.source_path = source_path
|
|
|
|
self.source = None
|
|
self.table = None
|
|
self.tokens = None
|
|
self.tree = None
|
|
self.errors = None
|
|
|
|
def run(self):
|
|
while True:
|
|
i, _, _ = select.select([sys.stdin], [], [], 1)
|
|
if i:
|
|
k = sys.stdin.read(1)
|
|
print(f"Key {k}\r")
|
|
return
|
|
|
|
self.update()
|
|
|
|
def update(self):
|
|
if self.table is None:
|
|
self.table = self.grammar_func().build_table(
|
|
start=self.start_rule, generator=self.generator
|
|
)
|
|
assert self.table is not None
|
|
|
|
if self.tokens is None:
|
|
with open(self.source_path, "r", encoding="utf-8") as f:
|
|
self.source = f.read()
|
|
self.tokens = self.lexer_func(self.source)
|
|
|
|
# print(f"{tokens.lines}")
|
|
# tokens.dump(end=5)
|
|
if self.tree is None and self.errors is None:
|
|
(tree, errors) = parse(self.table, self.tokens, trace=None)
|
|
self.tree = tree
|
|
self.errors = errors
|
|
|
|
sys.stdout.buffer.write(CLEAR)
|
|
rows, cols = termios.tcgetwinsize(sys.stdout.fileno())
|
|
|
|
states = self.table.states
|
|
average_entries = sum(len(row) for row in states) / len(states)
|
|
max_entries = max(len(row) for row in states)
|
|
print(f"{len(states)} states - {average_entries} average, {max_entries} max\r")
|
|
|
|
if self.tree is not None:
|
|
lines = []
|
|
self.format_node(lines, self.tree)
|
|
for line in lines[: rows - 2]:
|
|
print(line[:cols] + "\r")
|
|
|
|
sys.stdout.flush()
|
|
sys.stdout.buffer.flush()
|
|
|
|
def format_node(self, lines, node: Tree | str, indent=0):
|
|
"""Print out an indented concrete syntax tree, from parse()."""
|
|
match node:
|
|
case Tree(name, children):
|
|
lines.append((" " * indent) + (name or "???"))
|
|
for child in children:
|
|
self.format_node(lines, child, indent + 2)
|
|
case _:
|
|
lines.append((" " * indent) + str(node))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
source_path = None
|
|
if len(sys.argv) == 2:
|
|
source_path = sys.argv[1]
|
|
|
|
fd = sys.stdin.fileno()
|
|
old_settings = termios.tcgetattr(fd)
|
|
try:
|
|
tty.setraw(fd)
|
|
enter_alt_screen()
|
|
|
|
h = Harness(
|
|
lexer_func=grammar.FineTokens,
|
|
grammar_func=grammar.FineGrammar,
|
|
start_rule="file",
|
|
source_path=source_path,
|
|
)
|
|
h.run()
|
|
|
|
finally:
|
|
leave_alt_screen()
|
|
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
|
|
|
|
# print(parser_faster.format_table(gen, table))
|
|
# print()
|
|
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
|