Start making parsing thread-based

This commit is contained in:
John Doty 2024-06-06 08:05:40 -07:00
parent bd70315935
commit b60b38d78e

View file

@ -1,5 +1,6 @@
import argparse
import bisect
import enum
import importlib
import inspect
import enum
@ -25,9 +26,10 @@ import parser
###############################################################################
def trace_state(stack, input, input_index, action):
def trace_state(id, stack, input, input_index, action):
print(
"{stack: <20} {input: <50} {action: <5}".format(
"{id: <04}: {stack: <20} {input: <50} {action: <5}".format(
id=id,
stack=repr([s[0] for s in stack]),
input=repr(input[input_index : input_index + 4]),
action=repr(action),
@ -50,20 +52,97 @@ class Tree:
children: typing.Tuple["Tree | TokenValue", ...]
@dataclass
class AcceptResult:
result: Tree
@dataclass
class ContinueResult:
pass
@dataclass
class ErrorResult:
pass
StepResult = AcceptResult | ContinueResult | ErrorResult
class ParserThread:
# Our stack is a stack of tuples, where the first entry is the state
# number and the second entry is the 'value' that was generated when the
# state was pushed.
stack: list[typing.Tuple[int, TokenValue | Tree | None]]
def __init__(self, id, trace, stack):
self.id = id
self.trace = trace
self.stack = stack
def step(
self,
table: parser.ParseTable,
current_token: str,
input_index: int,
input_tokens: list[typing.Tuple],
) -> StepResult:
stack = self.stack
while True:
current_state = stack[-1][0]
action = table.actions[current_state].get(current_token, parser.Error())
if self.trace:
self.trace(self.id, stack, input, input_index, action)
match action:
case parser.Accept():
result = stack[-1][1]
assert isinstance(result, Tree)
return AcceptResult(result)
case parser.Reduce(name=name, count=size, transparent=transparent):
children: list[TokenValue | Tree] = []
for _, c in stack[-size:]:
if c is None:
continue
elif isinstance(c, Tree) and c.name is None:
children.extend(c.children)
else:
children.append(c)
value = Tree(
name=name if not transparent else None,
start=children[0].start,
end=children[-1].end,
children=tuple(children),
)
del stack[-size:]
goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None
stack.append((goto, value))
continue
case parser.Shift(state):
(kind, start, length) = input_tokens[input_index]
tval = TokenValue(kind=kind.value, start=start, end=start + length)
stack.append((state, tval))
return ContinueResult()
case parser.Error():
return ErrorResult()
case _:
raise ValueError(f"Unknown action type: {action}")
def parser_thread():
pass
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
"""Parse the input with the generated parsing table and return the
concrete syntax tree.
The parsing table can be generated by GenerateLR0.gen_table() or by any
of the other generators below. The parsing mechanism never changes, only
the table generation mechanism.
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
one on for you.
This is not a *great* parser, it's really just a demo for what you can
do with the table.
"""
input_tokens = tokens.tokens()
input: list[str] = [t.value for (t, _, _) in input_tokens]
@ -71,73 +150,43 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
input = input + ["$"]
input_index = 0
# Our stack is a stack of tuples, where the first entry is the state number
# and the second entry is the 'value' that was generated when the state was
# pushed.
stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)]
threads = [ParserThread(0, trace, [(0, None)])]
while True:
current_state = stack[-1][0]
assert len(threads) > 0
current_token = input[input_index]
for thread in threads:
sr = thread.step(table, current_token, input_index, input_tokens)
match sr:
case AcceptResult(value):
return (value, [])
action = table.actions[current_state].get(current_token, parser.Error())
if trace:
trace(stack, input, input_index, action)
case ContinueResult():
break
match action:
case parser.Accept():
result = stack[-1][1]
assert isinstance(result, Tree)
return (result, [])
case parser.Reduce(name=name, count=size, transparent=transparent):
children: list[TokenValue | Tree] = []
for _, c in stack[-size:]:
if c is None:
continue
elif isinstance(c, Tree) and c.name is None:
children.extend(c.children)
case ErrorResult():
if input_index >= len(input_tokens):
message = "Unexpected end of file"
start = input_tokens[-1][1]
else:
children.append(c)
message = f"Syntax error: unexpected symbol {current_token}"
(_, start, _) = input_tokens[input_index]
value = Tree(
name=name if not transparent else None,
start=children[0].start,
end=children[-1].end,
children=tuple(children),
)
stack = stack[:-size]
line_index = bisect.bisect_left(tokens.lines, start)
if line_index == 0:
col_start = 0
else:
col_start = tokens.lines[line_index - 1] + 1
column_index = start - col_start
line_index += 1
goto = table.gotos[stack[-1][0]].get(name)
assert goto is not None
stack.append((goto, value))
error = f"{line_index}:{column_index}: {message}"
return (None, [error])
case _:
typing.assert_never(sr)
case parser.Shift(state):
(kind, start, length) = input_tokens[input_index]
tval = TokenValue(kind=kind.value, start=start, end=start + length)
stack.append((state, tval))
input_index += 1
case parser.Error():
if input_index >= len(input_tokens):
message = "Unexpected end of file"
start = input_tokens[-1][1]
else:
message = f"Syntax error: unexpected symbol {current_token}"
(_, start, _) = input_tokens[input_index]
line_index = bisect.bisect_left(tokens.lines, start)
if line_index == 0:
col_start = 0
else:
col_start = tokens.lines[line_index - 1] + 1
column_index = start - col_start
line_index += 1
error = f"{line_index}:{column_index}: {message}"
return (None, [error])
case _:
raise ValueError(f"Unknown action type: {action}")
# All threads have accepted or errored or consumed input.
input_index += 1
###############################################################################