Start making parsing thread-based

This commit is contained in:
John Doty 2024-06-06 08:05:40 -07:00
parent bd70315935
commit b60b38d78e

View file

@ -1,5 +1,6 @@
import argparse
import bisect
import enum
import importlib
import inspect
import enum
@ -25,9 +26,10 @@ import parser
###############################################################################
def trace_state(stack, input, input_index, action):
def trace_state(id, stack, input, input_index, action):
print(
"{stack: <20} {input: <50} {action: <5}".format(
"{id: <04}: {stack: <20} {input: <50} {action: <5}".format(
id=id,
stack=repr([s[0] for s in stack]),
input=repr(input[input_index : input_index + 4]),
action=repr(action),
@ -50,44 +52,55 @@ class Tree:
children: typing.Tuple["Tree | TokenValue", ...]
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
"""Parse the input with the generated parsing table and return the
concrete syntax tree.
@dataclass
class AcceptResult:
result: Tree
The parsing table can be generated by GenerateLR0.gen_table() or by any
of the other generators below. The parsing mechanism never changes, only
the table generation mechanism.
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
one on for you.
@dataclass
class ContinueResult:
pass
This is not a *great* parser, it's really just a demo for what you can
do with the table.
"""
input_tokens = tokens.tokens()
input: list[str] = [t.value for (t, _, _) in input_tokens]
assert "$" not in input
input = input + ["$"]
input_index = 0
@dataclass
class ErrorResult:
pass
# Our stack is a stack of tuples, where the first entry is the state number
# and the second entry is the 'value' that was generated when the state was
# pushed.
stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)]
StepResult = AcceptResult | ContinueResult | ErrorResult
class ParserThread:
# Our stack is a stack of tuples, where the first entry is the state
# number and the second entry is the 'value' that was generated when the
# state was pushed.
stack: list[typing.Tuple[int, TokenValue | Tree | None]]
def __init__(self, id, trace, stack):
self.id = id
self.trace = trace
self.stack = stack
def step(
self,
table: parser.ParseTable,
current_token: str,
input_index: int,
input_tokens: list[typing.Tuple],
) -> StepResult:
stack = self.stack
while True:
current_state = stack[-1][0]
current_token = input[input_index]
action = table.actions[current_state].get(current_token, parser.Error())
if trace:
trace(stack, input, input_index, action)
if self.trace:
self.trace(self.id, stack, input, input_index, action)
match action:
case parser.Accept():
result = stack[-1][1]
assert isinstance(result, Tree)
return (result, [])
return AcceptResult(result)
case parser.Reduce(name=name, count=size, transparent=transparent):
children: list[TokenValue | Tree] = []
@ -105,19 +118,53 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
end=children[-1].end,
children=tuple(children),
)
stack = stack[:-size]
del stack[-size:]
goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None
stack.append((goto, value))
continue
case parser.Shift(state):
(kind, start, length) = input_tokens[input_index]
tval = TokenValue(kind=kind.value, start=start, end=start + length)
stack.append((state, tval))
input_index += 1
return ContinueResult()
case parser.Error():
return ErrorResult()
case _:
raise ValueError(f"Unknown action type: {action}")
def parser_thread():
pass
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
input_tokens = tokens.tokens()
input: list[str] = [t.value for (t, _, _) in input_tokens]
assert "$" not in input
input = input + ["$"]
input_index = 0
threads = [ParserThread(0, trace, [(0, None)])]
while True:
assert len(threads) > 0
current_token = input[input_index]
for thread in threads:
sr = thread.step(table, current_token, input_index, input_tokens)
match sr:
case AcceptResult(value):
return (value, [])
case ContinueResult():
break
case ErrorResult():
if input_index >= len(input_tokens):
message = "Unexpected end of file"
start = input_tokens[-1][1]
@ -135,9 +182,11 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
error = f"{line_index}:{column_index}: {message}"
return (None, [error])
case _:
raise ValueError(f"Unknown action type: {action}")
typing.assert_never(sr)
# All threads have accepted or errored or consumed input.
input_index += 1
###############################################################################