From b60b38d78e7ee0b2d58642044fa414d78e02ecd1 Mon Sep 17 00:00:00 2001 From: John Doty Date: Thu, 6 Jun 2024 08:05:40 -0700 Subject: [PATCH] Start making parsing thread-based --- harness.py | 197 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 123 insertions(+), 74 deletions(-) diff --git a/harness.py b/harness.py index 8e87501..4de9244 100644 --- a/harness.py +++ b/harness.py @@ -1,5 +1,6 @@ import argparse import bisect +import enum import importlib import inspect import enum @@ -25,9 +26,10 @@ import parser ############################################################################### -def trace_state(stack, input, input_index, action): +def trace_state(id, stack, input, input_index, action): print( - "{stack: <20} {input: <50} {action: <5}".format( + "{id: <04}: {stack: <20} {input: <50} {action: <5}".format( + id=id, stack=repr([s[0] for s in stack]), input=repr(input[input_index : input_index + 4]), action=repr(action), @@ -50,20 +52,97 @@ class Tree: children: typing.Tuple["Tree | TokenValue", ...] +@dataclass +class AcceptResult: + result: Tree + + +@dataclass +class ContinueResult: + pass + + +@dataclass +class ErrorResult: + pass + + +StepResult = AcceptResult | ContinueResult | ErrorResult + + +class ParserThread: + # Our stack is a stack of tuples, where the first entry is the state + # number and the second entry is the 'value' that was generated when the + # state was pushed. + stack: list[typing.Tuple[int, TokenValue | Tree | None]] + + def __init__(self, id, trace, stack): + self.id = id + self.trace = trace + self.stack = stack + + def step( + self, + table: parser.ParseTable, + current_token: str, + input_index: int, + input_tokens: list[typing.Tuple], + ) -> StepResult: + stack = self.stack + while True: + current_state = stack[-1][0] + + action = table.actions[current_state].get(current_token, parser.Error()) + if self.trace: + self.trace(self.id, stack, input, input_index, action) + + match action: + case parser.Accept(): + result = stack[-1][1] + assert isinstance(result, Tree) + return AcceptResult(result) + + case parser.Reduce(name=name, count=size, transparent=transparent): + children: list[TokenValue | Tree] = [] + for _, c in stack[-size:]: + if c is None: + continue + elif isinstance(c, Tree) and c.name is None: + children.extend(c.children) + else: + children.append(c) + + value = Tree( + name=name if not transparent else None, + start=children[0].start, + end=children[-1].end, + children=tuple(children), + ) + del stack[-size:] + + goto = table.gotos[stack[-1][0]].get(name) + assert goto is not None + stack.append((goto, value)) + continue + + case parser.Shift(state): + (kind, start, length) = input_tokens[input_index] + tval = TokenValue(kind=kind.value, start=start, end=start + length) + stack.append((state, tval)) + return ContinueResult() + + case parser.Error(): + return ErrorResult() + + case _: + raise ValueError(f"Unknown action type: {action}") + + +def parser_thread(): + pass + + def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]: - """Parse the input with the generated parsing table and return the - concrete syntax tree. - - The parsing table can be generated by GenerateLR0.gen_table() or by any - of the other generators below. The parsing mechanism never changes, only - the table generation mechanism. - - input is a list of tokens. Don't stick an end-of-stream marker, I'll stick - one on for you. - - This is not a *great* parser, it's really just a demo for what you can - do with the table. - """ input_tokens = tokens.tokens() input: list[str] = [t.value for (t, _, _) in input_tokens] @@ -71,73 +150,43 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N input = input + ["$"] input_index = 0 - # Our stack is a stack of tuples, where the first entry is the state number - # and the second entry is the 'value' that was generated when the state was - # pushed. - stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)] + threads = [ParserThread(0, trace, [(0, None)])] + while True: - current_state = stack[-1][0] + assert len(threads) > 0 current_token = input[input_index] + for thread in threads: + sr = thread.step(table, current_token, input_index, input_tokens) + match sr: + case AcceptResult(value): + return (value, []) - action = table.actions[current_state].get(current_token, parser.Error()) - if trace: - trace(stack, input, input_index, action) + case ContinueResult(): + break - match action: - case parser.Accept(): - result = stack[-1][1] - assert isinstance(result, Tree) - return (result, []) - - case parser.Reduce(name=name, count=size, transparent=transparent): - children: list[TokenValue | Tree] = [] - for _, c in stack[-size:]: - if c is None: - continue - elif isinstance(c, Tree) and c.name is None: - children.extend(c.children) + case ErrorResult(): + if input_index >= len(input_tokens): + message = "Unexpected end of file" + start = input_tokens[-1][1] else: - children.append(c) + message = f"Syntax error: unexpected symbol {current_token}" + (_, start, _) = input_tokens[input_index] - value = Tree( - name=name if not transparent else None, - start=children[0].start, - end=children[-1].end, - children=tuple(children), - ) - stack = stack[:-size] + line_index = bisect.bisect_left(tokens.lines, start) + if line_index == 0: + col_start = 0 + else: + col_start = tokens.lines[line_index - 1] + 1 + column_index = start - col_start + line_index += 1 - goto = table.gotos[stack[-1][0]].get(name) - assert goto is not None - stack.append((goto, value)) + error = f"{line_index}:{column_index}: {message}" + return (None, [error]) + case _: + typing.assert_never(sr) - case parser.Shift(state): - (kind, start, length) = input_tokens[input_index] - tval = TokenValue(kind=kind.value, start=start, end=start + length) - stack.append((state, tval)) - input_index += 1 - - case parser.Error(): - if input_index >= len(input_tokens): - message = "Unexpected end of file" - start = input_tokens[-1][1] - else: - message = f"Syntax error: unexpected symbol {current_token}" - (_, start, _) = input_tokens[input_index] - - line_index = bisect.bisect_left(tokens.lines, start) - if line_index == 0: - col_start = 0 - else: - col_start = tokens.lines[line_index - 1] + 1 - column_index = start - col_start - line_index += 1 - - error = f"{line_index}:{column_index}: {message}" - return (None, [error]) - - case _: - raise ValueError(f"Unknown action type: {action}") + # All threads have accepted or errored or consumed input. + input_index += 1 ###############################################################################