Even more threaded parsing

This commit is contained in:
John Doty 2024-06-06 17:02:07 -07:00
parent f904f7eea3
commit 22029edbb2

View file

@ -53,21 +53,24 @@ class Tree:
@dataclass @dataclass
class AcceptResult: class ParseError:
result: Tree message: str
start: int
end: int
@dataclass
class StopResult:
result: Tree | None
errors: list[ParseError]
@dataclass @dataclass
class ContinueResult: class ContinueResult:
pass threads: list["ParserThread"]
@dataclass StepResult = StopResult | ContinueResult
class ErrorResult:
pass
StepResult = AcceptResult | ContinueResult | ErrorResult
class ParserThread: class ParserThread:
@ -76,12 +79,14 @@ class ParserThread:
# state was pushed. # state was pushed.
table: parser.ParseTable table: parser.ParseTable
stack: list[typing.Tuple[int, TokenValue | Tree | None]] stack: list[typing.Tuple[int, TokenValue | Tree | None]]
errors: list[ParseError]
def __init__(self, id, trace, table, stack): def __init__(self, id, trace, table, stack):
self.id = id self.id = id
self.trace = trace self.trace = trace
self.table = table self.table = table
self.stack = stack self.stack = stack
self.errors = []
def step(self, current_token: TokenValue) -> StepResult: def step(self, current_token: TokenValue) -> StepResult:
stack = self.stack stack = self.stack
@ -98,7 +103,7 @@ class ParserThread:
case parser.Accept(): case parser.Accept():
result = stack[-1][1] result = stack[-1][1]
assert isinstance(result, Tree) assert isinstance(result, Tree)
return AcceptResult(result) return StopResult(result, self.errors)
case parser.Reduce(name=name, count=size, transparent=transparent): case parser.Reduce(name=name, count=size, transparent=transparent):
children: list[TokenValue | Tree] = [] children: list[TokenValue | Tree] = []
@ -125,10 +130,21 @@ class ParserThread:
case parser.Shift(state): case parser.Shift(state):
stack.append((state, current_token)) stack.append((state, current_token))
return ContinueResult() return ContinueResult([self])
case parser.Error(): case parser.Error():
return ErrorResult() if current_token.kind == "$":
message = "Unexpected end of file"
else:
message = f"Syntax error: unexpected symbol {current_token.kind}"
self.errors.append(
ParseError(
message=message, start=current_token.start, end=current_token.end
)
)
# TODO: Error Recovery Here
return StopResult(None, self.errors)
case _: case _:
raise ValueError(f"Unknown action type: {action}") raise ValueError(f"Unknown action type: {action}")
@ -148,43 +164,47 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
threads = [ threads = [
ParserThread(0, trace, table, [(0, None)]), ParserThread(0, trace, table, [(0, None)]),
] ]
results: list[typing.Tuple[ParserThread, Tree | None, list[ParseError]]] = []
while True: while len(threads) > 0:
assert len(threads) > 0
current_token = input[input_index] current_token = input[input_index]
next_threads: list[ParserThread] = []
for thread in threads: for thread in threads:
sr = thread.step(current_token) sr = thread.step(current_token)
match sr: match sr:
case AcceptResult(value): case StopResult(value, errors):
return (value, []) results.append((thread, value, errors))
case ContinueResult():
break break
case ErrorResult(): case ContinueResult(threads):
if input_index >= len(input_tokens): next_threads.extend(threads)
message = "Unexpected end of file" break
start = input_tokens[-1][1]
else:
message = f"Syntax error: unexpected symbol {current_token.kind}"
start = current_token.start
line_index = bisect.bisect_left(tokens.lines, start)
if line_index == 0:
col_start = 0
else:
col_start = tokens.lines[line_index - 1] + 1
column_index = start - col_start
line_index += 1
error = f"{line_index}:{column_index}: {message}"
return (None, [error])
case _: case _:
typing.assert_never(sr) typing.assert_never(sr)
# All threads have accepted or errored or consumed input. # All threads have accepted or errored or consumed input.
threads = next_threads
input_index += 1 input_index += 1
# TODO: Score results and whatnot, pick the best one.
assert len(results) > 0
_, result, errors = results[0]
error_strings = []
for parse_error in errors:
line_index = bisect.bisect_left(tokens.lines, parse_error.start)
if line_index == 0:
col_start = 0
else:
col_start = tokens.lines[line_index - 1] + 1
column_index = parse_error.start - col_start
line_index += 1
error_strings.append(f"{line_index}:{column_index}: {parse_error.message}")
return (result, error_strings)
############################################################################### ###############################################################################
# Screen Stuff # Screen Stuff