Start making parsing thread-based
This commit is contained in:
parent
bd70315935
commit
b60b38d78e
1 changed files with 123 additions and 74 deletions
197
harness.py
197
harness.py
|
|
@ -1,5 +1,6 @@
|
||||||
import argparse
|
import argparse
|
||||||
import bisect
|
import bisect
|
||||||
|
import enum
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
import enum
|
import enum
|
||||||
|
|
@ -25,9 +26,10 @@ import parser
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
def trace_state(stack, input, input_index, action):
|
def trace_state(id, stack, input, input_index, action):
|
||||||
print(
|
print(
|
||||||
"{stack: <20} {input: <50} {action: <5}".format(
|
"{id: <04}: {stack: <20} {input: <50} {action: <5}".format(
|
||||||
|
id=id,
|
||||||
stack=repr([s[0] for s in stack]),
|
stack=repr([s[0] for s in stack]),
|
||||||
input=repr(input[input_index : input_index + 4]),
|
input=repr(input[input_index : input_index + 4]),
|
||||||
action=repr(action),
|
action=repr(action),
|
||||||
|
|
@ -50,20 +52,97 @@ class Tree:
|
||||||
children: typing.Tuple["Tree | TokenValue", ...]
|
children: typing.Tuple["Tree | TokenValue", ...]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AcceptResult:
|
||||||
|
result: Tree
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContinueResult:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ErrorResult:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
StepResult = AcceptResult | ContinueResult | ErrorResult
|
||||||
|
|
||||||
|
|
||||||
|
class ParserThread:
|
||||||
|
# Our stack is a stack of tuples, where the first entry is the state
|
||||||
|
# number and the second entry is the 'value' that was generated when the
|
||||||
|
# state was pushed.
|
||||||
|
stack: list[typing.Tuple[int, TokenValue | Tree | None]]
|
||||||
|
|
||||||
|
def __init__(self, id, trace, stack):
|
||||||
|
self.id = id
|
||||||
|
self.trace = trace
|
||||||
|
self.stack = stack
|
||||||
|
|
||||||
|
def step(
|
||||||
|
self,
|
||||||
|
table: parser.ParseTable,
|
||||||
|
current_token: str,
|
||||||
|
input_index: int,
|
||||||
|
input_tokens: list[typing.Tuple],
|
||||||
|
) -> StepResult:
|
||||||
|
stack = self.stack
|
||||||
|
while True:
|
||||||
|
current_state = stack[-1][0]
|
||||||
|
|
||||||
|
action = table.actions[current_state].get(current_token, parser.Error())
|
||||||
|
if self.trace:
|
||||||
|
self.trace(self.id, stack, input, input_index, action)
|
||||||
|
|
||||||
|
match action:
|
||||||
|
case parser.Accept():
|
||||||
|
result = stack[-1][1]
|
||||||
|
assert isinstance(result, Tree)
|
||||||
|
return AcceptResult(result)
|
||||||
|
|
||||||
|
case parser.Reduce(name=name, count=size, transparent=transparent):
|
||||||
|
children: list[TokenValue | Tree] = []
|
||||||
|
for _, c in stack[-size:]:
|
||||||
|
if c is None:
|
||||||
|
continue
|
||||||
|
elif isinstance(c, Tree) and c.name is None:
|
||||||
|
children.extend(c.children)
|
||||||
|
else:
|
||||||
|
children.append(c)
|
||||||
|
|
||||||
|
value = Tree(
|
||||||
|
name=name if not transparent else None,
|
||||||
|
start=children[0].start,
|
||||||
|
end=children[-1].end,
|
||||||
|
children=tuple(children),
|
||||||
|
)
|
||||||
|
del stack[-size:]
|
||||||
|
|
||||||
|
goto = table.gotos[stack[-1][0]].get(name)
|
||||||
|
assert goto is not None
|
||||||
|
stack.append((goto, value))
|
||||||
|
continue
|
||||||
|
|
||||||
|
case parser.Shift(state):
|
||||||
|
(kind, start, length) = input_tokens[input_index]
|
||||||
|
tval = TokenValue(kind=kind.value, start=start, end=start + length)
|
||||||
|
stack.append((state, tval))
|
||||||
|
return ContinueResult()
|
||||||
|
|
||||||
|
case parser.Error():
|
||||||
|
return ErrorResult()
|
||||||
|
|
||||||
|
case _:
|
||||||
|
raise ValueError(f"Unknown action type: {action}")
|
||||||
|
|
||||||
|
|
||||||
|
def parser_thread():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
|
def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
|
||||||
"""Parse the input with the generated parsing table and return the
|
|
||||||
concrete syntax tree.
|
|
||||||
|
|
||||||
The parsing table can be generated by GenerateLR0.gen_table() or by any
|
|
||||||
of the other generators below. The parsing mechanism never changes, only
|
|
||||||
the table generation mechanism.
|
|
||||||
|
|
||||||
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
|
|
||||||
one on for you.
|
|
||||||
|
|
||||||
This is not a *great* parser, it's really just a demo for what you can
|
|
||||||
do with the table.
|
|
||||||
"""
|
|
||||||
input_tokens = tokens.tokens()
|
input_tokens = tokens.tokens()
|
||||||
input: list[str] = [t.value for (t, _, _) in input_tokens]
|
input: list[str] = [t.value for (t, _, _) in input_tokens]
|
||||||
|
|
||||||
|
|
@ -71,73 +150,43 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
|
||||||
input = input + ["$"]
|
input = input + ["$"]
|
||||||
input_index = 0
|
input_index = 0
|
||||||
|
|
||||||
# Our stack is a stack of tuples, where the first entry is the state number
|
threads = [ParserThread(0, trace, [(0, None)])]
|
||||||
# and the second entry is the 'value' that was generated when the state was
|
|
||||||
# pushed.
|
|
||||||
stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)]
|
|
||||||
while True:
|
while True:
|
||||||
current_state = stack[-1][0]
|
assert len(threads) > 0
|
||||||
current_token = input[input_index]
|
current_token = input[input_index]
|
||||||
|
for thread in threads:
|
||||||
|
sr = thread.step(table, current_token, input_index, input_tokens)
|
||||||
|
match sr:
|
||||||
|
case AcceptResult(value):
|
||||||
|
return (value, [])
|
||||||
|
|
||||||
action = table.actions[current_state].get(current_token, parser.Error())
|
case ContinueResult():
|
||||||
if trace:
|
break
|
||||||
trace(stack, input, input_index, action)
|
|
||||||
|
|
||||||
match action:
|
case ErrorResult():
|
||||||
case parser.Accept():
|
if input_index >= len(input_tokens):
|
||||||
result = stack[-1][1]
|
message = "Unexpected end of file"
|
||||||
assert isinstance(result, Tree)
|
start = input_tokens[-1][1]
|
||||||
return (result, [])
|
|
||||||
|
|
||||||
case parser.Reduce(name=name, count=size, transparent=transparent):
|
|
||||||
children: list[TokenValue | Tree] = []
|
|
||||||
for _, c in stack[-size:]:
|
|
||||||
if c is None:
|
|
||||||
continue
|
|
||||||
elif isinstance(c, Tree) and c.name is None:
|
|
||||||
children.extend(c.children)
|
|
||||||
else:
|
else:
|
||||||
children.append(c)
|
message = f"Syntax error: unexpected symbol {current_token}"
|
||||||
|
(_, start, _) = input_tokens[input_index]
|
||||||
|
|
||||||
value = Tree(
|
line_index = bisect.bisect_left(tokens.lines, start)
|
||||||
name=name if not transparent else None,
|
if line_index == 0:
|
||||||
start=children[0].start,
|
col_start = 0
|
||||||
end=children[-1].end,
|
else:
|
||||||
children=tuple(children),
|
col_start = tokens.lines[line_index - 1] + 1
|
||||||
)
|
column_index = start - col_start
|
||||||
stack = stack[:-size]
|
line_index += 1
|
||||||
|
|
||||||
goto = table.gotos[stack[-1][0]].get(name)
|
error = f"{line_index}:{column_index}: {message}"
|
||||||
assert goto is not None
|
return (None, [error])
|
||||||
stack.append((goto, value))
|
case _:
|
||||||
|
typing.assert_never(sr)
|
||||||
|
|
||||||
case parser.Shift(state):
|
# All threads have accepted or errored or consumed input.
|
||||||
(kind, start, length) = input_tokens[input_index]
|
input_index += 1
|
||||||
tval = TokenValue(kind=kind.value, start=start, end=start + length)
|
|
||||||
stack.append((state, tval))
|
|
||||||
input_index += 1
|
|
||||||
|
|
||||||
case parser.Error():
|
|
||||||
if input_index >= len(input_tokens):
|
|
||||||
message = "Unexpected end of file"
|
|
||||||
start = input_tokens[-1][1]
|
|
||||||
else:
|
|
||||||
message = f"Syntax error: unexpected symbol {current_token}"
|
|
||||||
(_, start, _) = input_tokens[input_index]
|
|
||||||
|
|
||||||
line_index = bisect.bisect_left(tokens.lines, start)
|
|
||||||
if line_index == 0:
|
|
||||||
col_start = 0
|
|
||||||
else:
|
|
||||||
col_start = tokens.lines[line_index - 1] + 1
|
|
||||||
column_index = start - col_start
|
|
||||||
line_index += 1
|
|
||||||
|
|
||||||
error = f"{line_index}:{column_index}: {message}"
|
|
||||||
return (None, [error])
|
|
||||||
|
|
||||||
case _:
|
|
||||||
raise ValueError(f"Unknown action type: {action}")
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue