Start making parsing thread-based
This commit is contained in:
parent
bd70315935
commit
b60b38d78e
1 changed files with 123 additions and 74 deletions
197
harness.py
197
harness.py
|
|
@ -1,5 +1,6 @@
|
|||
import argparse
|
||||
import bisect
|
||||
import enum
|
||||
import importlib
|
||||
import inspect
|
||||
import enum
|
||||
|
|
@ -25,9 +26,10 @@ import parser
|
|||
###############################################################################
|
||||
|
||||
|
||||
def trace_state(stack, input, input_index, action):
|
||||
def trace_state(id, stack, input, input_index, action):
|
||||
print(
|
||||
"{stack: <20} {input: <50} {action: <5}".format(
|
||||
"{id: <04}: {stack: <20} {input: <50} {action: <5}".format(
|
||||
id=id,
|
||||
stack=repr([s[0] for s in stack]),
|
||||
input=repr(input[input_index : input_index + 4]),
|
||||
action=repr(action),
|
||||
|
|
@ -50,20 +52,97 @@ class Tree:
|
|||
children: typing.Tuple["Tree | TokenValue", ...]
|
||||
|
||||
|
||||
@dataclass
|
||||
class AcceptResult:
|
||||
result: Tree
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContinueResult:
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorResult:
|
||||
pass
|
||||
|
||||
|
||||
StepResult = AcceptResult | ContinueResult | ErrorResult
|
||||
|
||||
|
||||
class ParserThread:
|
||||
# Our stack is a stack of tuples, where the first entry is the state
|
||||
# number and the second entry is the 'value' that was generated when the
|
||||
# state was pushed.
|
||||
stack: list[typing.Tuple[int, TokenValue | Tree | None]]
|
||||
|
||||
def __init__(self, id, trace, stack):
|
||||
self.id = id
|
||||
self.trace = trace
|
||||
self.stack = stack
|
||||
|
||||
def step(
|
||||
self,
|
||||
table: parser.ParseTable,
|
||||
current_token: str,
|
||||
input_index: int,
|
||||
input_tokens: list[typing.Tuple],
|
||||
) -> StepResult:
|
||||
stack = self.stack
|
||||
while True:
|
||||
current_state = stack[-1][0]
|
||||
|
||||
action = table.actions[current_state].get(current_token, parser.Error())
|
||||
if self.trace:
|
||||
self.trace(self.id, stack, input, input_index, action)
|
||||
|
||||
match action:
|
||||
case parser.Accept():
|
||||
result = stack[-1][1]
|
||||
assert isinstance(result, Tree)
|
||||
return AcceptResult(result)
|
||||
|
||||
case parser.Reduce(name=name, count=size, transparent=transparent):
|
||||
children: list[TokenValue | Tree] = []
|
||||
for _, c in stack[-size:]:
|
||||
if c is None:
|
||||
continue
|
||||
elif isinstance(c, Tree) and c.name is None:
|
||||
children.extend(c.children)
|
||||
else:
|
||||
children.append(c)
|
||||
|
||||
value = Tree(
|
||||
name=name if not transparent else None,
|
||||
start=children[0].start,
|
||||
end=children[-1].end,
|
||||
children=tuple(children),
|
||||
)
|
||||
del stack[-size:]
|
||||
|
||||
goto = table.gotos[stack[-1][0]].get(name)
|
||||
assert goto is not None
|
||||
stack.append((goto, value))
|
||||
continue
|
||||
|
||||
case parser.Shift(state):
|
||||
(kind, start, length) = input_tokens[input_index]
|
||||
tval = TokenValue(kind=kind.value, start=start, end=start + length)
|
||||
stack.append((state, tval))
|
||||
return ContinueResult()
|
||||
|
||||
case parser.Error():
|
||||
return ErrorResult()
|
||||
|
||||
case _:
|
||||
raise ValueError(f"Unknown action type: {action}")
|
||||
|
||||
|
||||
def parser_thread():
|
||||
pass
|
||||
|
||||
|
||||
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
|
||||
"""Parse the input with the generated parsing table and return the
|
||||
concrete syntax tree.
|
||||
|
||||
The parsing table can be generated by GenerateLR0.gen_table() or by any
|
||||
of the other generators below. The parsing mechanism never changes, only
|
||||
the table generation mechanism.
|
||||
|
||||
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
|
||||
one on for you.
|
||||
|
||||
This is not a *great* parser, it's really just a demo for what you can
|
||||
do with the table.
|
||||
"""
|
||||
input_tokens = tokens.tokens()
|
||||
input: list[str] = [t.value for (t, _, _) in input_tokens]
|
||||
|
||||
|
|
@ -71,73 +150,43 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
|
|||
input = input + ["$"]
|
||||
input_index = 0
|
||||
|
||||
# Our stack is a stack of tuples, where the first entry is the state number
|
||||
# and the second entry is the 'value' that was generated when the state was
|
||||
# pushed.
|
||||
stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)]
|
||||
threads = [ParserThread(0, trace, [(0, None)])]
|
||||
|
||||
while True:
|
||||
current_state = stack[-1][0]
|
||||
assert len(threads) > 0
|
||||
current_token = input[input_index]
|
||||
for thread in threads:
|
||||
sr = thread.step(table, current_token, input_index, input_tokens)
|
||||
match sr:
|
||||
case AcceptResult(value):
|
||||
return (value, [])
|
||||
|
||||
action = table.actions[current_state].get(current_token, parser.Error())
|
||||
if trace:
|
||||
trace(stack, input, input_index, action)
|
||||
case ContinueResult():
|
||||
break
|
||||
|
||||
match action:
|
||||
case parser.Accept():
|
||||
result = stack[-1][1]
|
||||
assert isinstance(result, Tree)
|
||||
return (result, [])
|
||||
|
||||
case parser.Reduce(name=name, count=size, transparent=transparent):
|
||||
children: list[TokenValue | Tree] = []
|
||||
for _, c in stack[-size:]:
|
||||
if c is None:
|
||||
continue
|
||||
elif isinstance(c, Tree) and c.name is None:
|
||||
children.extend(c.children)
|
||||
case ErrorResult():
|
||||
if input_index >= len(input_tokens):
|
||||
message = "Unexpected end of file"
|
||||
start = input_tokens[-1][1]
|
||||
else:
|
||||
children.append(c)
|
||||
message = f"Syntax error: unexpected symbol {current_token}"
|
||||
(_, start, _) = input_tokens[input_index]
|
||||
|
||||
value = Tree(
|
||||
name=name if not transparent else None,
|
||||
start=children[0].start,
|
||||
end=children[-1].end,
|
||||
children=tuple(children),
|
||||
)
|
||||
stack = stack[:-size]
|
||||
line_index = bisect.bisect_left(tokens.lines, start)
|
||||
if line_index == 0:
|
||||
col_start = 0
|
||||
else:
|
||||
col_start = tokens.lines[line_index - 1] + 1
|
||||
column_index = start - col_start
|
||||
line_index += 1
|
||||
|
||||
goto = table.gotos[stack[-1][0]].get(name)
|
||||
assert goto is not None
|
||||
stack.append((goto, value))
|
||||
error = f"{line_index}:{column_index}: {message}"
|
||||
return (None, [error])
|
||||
case _:
|
||||
typing.assert_never(sr)
|
||||
|
||||
case parser.Shift(state):
|
||||
(kind, start, length) = input_tokens[input_index]
|
||||
tval = TokenValue(kind=kind.value, start=start, end=start + length)
|
||||
stack.append((state, tval))
|
||||
input_index += 1
|
||||
|
||||
case parser.Error():
|
||||
if input_index >= len(input_tokens):
|
||||
message = "Unexpected end of file"
|
||||
start = input_tokens[-1][1]
|
||||
else:
|
||||
message = f"Syntax error: unexpected symbol {current_token}"
|
||||
(_, start, _) = input_tokens[input_index]
|
||||
|
||||
line_index = bisect.bisect_left(tokens.lines, start)
|
||||
if line_index == 0:
|
||||
col_start = 0
|
||||
else:
|
||||
col_start = tokens.lines[line_index - 1] + 1
|
||||
column_index = start - col_start
|
||||
line_index += 1
|
||||
|
||||
error = f"{line_index}:{column_index}: {message}"
|
||||
return (None, [error])
|
||||
|
||||
case _:
|
||||
raise ValueError(f"Unknown action type: {action}")
|
||||
# All threads have accepted or errored or consumed input.
|
||||
input_index += 1
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue