Start making parsing thread-based

2024-06-06 08:05:40 -07:00 · 2024-06-06 08:05:40 -07:00 · b60b38d78e
commit b60b38d78e
parent bd70315935
1 changed files with 123 additions and 74 deletions
--- a/harness.py
+++ b/harness.py
@ -1,5 +1,6 @@
 import argparse
 import bisect
+import enum
 import importlib
 import inspect
 import enum
@ -25,9 +26,10 @@ import parser
 ###############################################################################


-def trace_state(stack, input, input_index, action):
+def trace_state(id, stack, input, input_index, action):
    print(
-        "{stack: <20}  {input: <50}  {action: <5}".format(
+        "{id: <04}: {stack: <20}  {input: <50}  {action: <5}".format(
+            id=id,
            stack=repr([s[0] for s in stack]),
            input=repr(input[input_index : input_index + 4]),
            action=repr(action),
@ -50,20 +52,97 @@ class Tree:
    children: typing.Tuple["Tree | TokenValue", ...]


+@dataclass
+class AcceptResult:
+    result: Tree
+
+
+@dataclass
+class ContinueResult:
+    pass
+
+
+@dataclass
+class ErrorResult:
+    pass
+
+
+StepResult = AcceptResult | ContinueResult | ErrorResult
+
+
+class ParserThread:
+    # Our stack is a stack of tuples, where the first entry is the state
+    # number and the second entry is the 'value' that was generated when the
+    # state was pushed.
+    stack: list[typing.Tuple[int, TokenValue | Tree | None]]
+
+    def __init__(self, id, trace, stack):
+        self.id = id
+        self.trace = trace
+        self.stack = stack
+
+    def step(
+        self,
+        table: parser.ParseTable,
+        current_token: str,
+        input_index: int,
+        input_tokens: list[typing.Tuple],
+    ) -> StepResult:
+        stack = self.stack
+        while True:
+            current_state = stack[-1][0]
+
+            action = table.actions[current_state].get(current_token, parser.Error())
+            if self.trace:
+                self.trace(self.id, stack, input, input_index, action)
+
+            match action:
+                case parser.Accept():
+                    result = stack[-1][1]
+                    assert isinstance(result, Tree)
+                    return AcceptResult(result)
+
+                case parser.Reduce(name=name, count=size, transparent=transparent):
+                    children: list[TokenValue | Tree] = []
+                    for _, c in stack[-size:]:
+                        if c is None:
+                            continue
+                        elif isinstance(c, Tree) and c.name is None:
+                            children.extend(c.children)
+                        else:
+                            children.append(c)
+
+                    value = Tree(
+                        name=name if not transparent else None,
+                        start=children[0].start,
+                        end=children[-1].end,
+                        children=tuple(children),
+                    )
+                    del stack[-size:]
+
+                    goto = table.gotos[stack[-1][0]].get(name)
+                    assert goto is not None
+                    stack.append((goto, value))
+                    continue
+
+                case parser.Shift(state):
+                    (kind, start, length) = input_tokens[input_index]
+                    tval = TokenValue(kind=kind.value, start=start, end=start + length)
+                    stack.append((state, tval))
+                    return ContinueResult()
+
+                case parser.Error():
+                    return ErrorResult()
+
+                case _:
+                    raise ValueError(f"Unknown action type: {action}")
+
+
+def parser_thread():
+    pass
+
+
 def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
-    """Parse the input with the generated parsing table and return the
-    concrete syntax tree.
-
-    The parsing table can be generated by GenerateLR0.gen_table() or by any
-    of the other generators below. The parsing mechanism never changes, only
-    the table generation mechanism.
-
-    input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
-    one on for you.
-
-    This is not a *great* parser, it's really just a demo for what you can
-    do with the table.
-    """
    input_tokens = tokens.tokens()
    input: list[str] = [t.value for (t, _, _) in input_tokens]

@ -71,73 +150,43 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
    input = input + ["$"]
    input_index = 0

-    # Our stack is a stack of tuples, where the first entry is the state number
-    # and the second entry is the 'value' that was generated when the state was
-    # pushed.
-    stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)]
+    threads = [ParserThread(0, trace, [(0, None)])]
+
    while True:
-        current_state = stack[-1][0]
+        assert len(threads) > 0
        current_token = input[input_index]
+        for thread in threads:
+            sr = thread.step(table, current_token, input_index, input_tokens)
+            match sr:
+                case AcceptResult(value):
+                    return (value, [])

-        action = table.actions[current_state].get(current_token, parser.Error())
-        if trace:
-            trace(stack, input, input_index, action)
+                case ContinueResult():
+                    break

-        match action:
-            case parser.Accept():
-                result = stack[-1][1]
-                assert isinstance(result, Tree)
-                return (result, [])
-
-            case parser.Reduce(name=name, count=size, transparent=transparent):
-                children: list[TokenValue | Tree] = []
-                for _, c in stack[-size:]:
-                    if c is None:
-                        continue
-                    elif isinstance(c, Tree) and c.name is None:
-                        children.extend(c.children)
+                case ErrorResult():
+                    if input_index >= len(input_tokens):
+                        message = "Unexpected end of file"
+                        start = input_tokens[-1][1]
                    else:
-                        children.append(c)
+                        message = f"Syntax error: unexpected symbol {current_token}"
+                        (_, start, _) = input_tokens[input_index]

-                value = Tree(
-                    name=name if not transparent else None,
-                    start=children[0].start,
-                    end=children[-1].end,
-                    children=tuple(children),
-                )
-                stack = stack[:-size]
+                    line_index = bisect.bisect_left(tokens.lines, start)
+                    if line_index == 0:
+                        col_start = 0
+                    else:
+                        col_start = tokens.lines[line_index - 1] + 1
+                    column_index = start - col_start
+                    line_index += 1

-                goto = table.gotos[stack[-1][0]].get(name)
-                assert goto is not None
-                stack.append((goto, value))
+                    error = f"{line_index}:{column_index}: {message}"
+                    return (None, [error])
+                case _:
+                    typing.assert_never(sr)

-            case parser.Shift(state):
-                (kind, start, length) = input_tokens[input_index]
-                tval = TokenValue(kind=kind.value, start=start, end=start + length)
-                stack.append((state, tval))
-                input_index += 1
-
-            case parser.Error():
-                if input_index >= len(input_tokens):
-                    message = "Unexpected end of file"
-                    start = input_tokens[-1][1]
-                else:
-                    message = f"Syntax error: unexpected symbol {current_token}"
-                    (_, start, _) = input_tokens[input_index]
-
-                line_index = bisect.bisect_left(tokens.lines, start)
-                if line_index == 0:
-                    col_start = 0
-                else:
-                    col_start = tokens.lines[line_index - 1] + 1
-                column_index = start - col_start
-                line_index += 1
-
-                error = f"{line_index}:{column_index}: {message}"
-                return (None, [error])
-
-            case _:
-                raise ValueError(f"Unknown action type: {action}")
+        # All threads have accepted or errored or consumed input.
+        input_index += 1


 ###############################################################################