Start making parsing thread-based

2024-06-06 08:05:40 -07:00 · 2024-06-06 08:05:40 -07:00 · b60b38d78e
commit b60b38d78e
parent bd70315935
1 changed files with 123 additions and 74 deletions
--- a/harness.py
+++ b/harness.py
@ -1,5 +1,6 @@
 import argparse
 import bisect
 import enum
 import importlib
 import inspect
 import enum
@ -25,9 +26,10 @@ import parser
 ###############################################################################
-def trace_state(stack, input, input_index, action):
+def trace_state(id, stack, input, input_index, action):
    print(
-        "{stack: <20}  {input: <50}  {action: <5}".format(
+        "{id: <04}: {stack: <20}  {input: <50}  {action: <5}".format(
            id=id,
            stack=repr([s[0] for s in stack]),
            input=repr(input[input_index : input_index + 4]),
            action=repr(action),
@ -50,20 +52,97 @@ class Tree:
    children: typing.Tuple["Tree | TokenValue", ...]
@dataclass
 class AcceptResult:
    result: Tree
@dataclass
 class ContinueResult:
    pass
@dataclass
 class ErrorResult:
    pass
 StepResult = AcceptResult | ContinueResult | ErrorResult
 class ParserThread:
    # Our stack is a stack of tuples, where the first entry is the state
    # number and the second entry is the 'value' that was generated when the
    # state was pushed.
    stack: list[typing.Tuple[int, TokenValue | Tree | None]]
    def __init__(self, id, trace, stack):
        self.id = id
        self.trace = trace
        self.stack = stack
    def step(
        self,
        table: parser.ParseTable,
        current_token: str,
        input_index: int,
        input_tokens: list[typing.Tuple],
    ) -> StepResult:
        stack = self.stack
        while True:
            current_state = stack[-1][0]
            action = table.actions[current_state].get(current_token, parser.Error())
            if self.trace:
                self.trace(self.id, stack, input, input_index, action)
            match action:
                case parser.Accept():
                    result = stack[-1][1]
                    assert isinstance(result, Tree)
                    return AcceptResult(result)
                case parser.Reduce(name=name, count=size, transparent=transparent):
                    children: list[TokenValue | Tree] = []
                    for _, c in stack[-size:]:
                        if c is None:
                            continue
                        elif isinstance(c, Tree) and c.name is None:
                            children.extend(c.children)
                        else:
                            children.append(c)
                    value = Tree(
                        name=name if not transparent else None,
                        start=children[0].start,
                        end=children[-1].end,
                        children=tuple(children),
                    )
                    del stack[-size:]
                    goto = table.gotos[stack[-1][0]].get(name)
                    assert goto is not None
                    stack.append((goto, value))
                    continue
                case parser.Shift(state):
                    (kind, start, length) = input_tokens[input_index]
                    tval = TokenValue(kind=kind.value, start=start, end=start + length)
                    stack.append((state, tval))
                    return ContinueResult()
                case parser.Error():
                    return ErrorResult()
                case _:
                    raise ValueError(f"Unknown action type: {action}")
 def parser_thread():
    pass
 def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | None, list[str]]:
    """Parse the input with the generated parsing table and return the
    concrete syntax tree.
    The parsing table can be generated by GenerateLR0.gen_table() or by any
    of the other generators below. The parsing mechanism never changes, only
    the table generation mechanism.
    input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
    one on for you.
    This is not a *great* parser, it's really just a demo for what you can
    do with the table.
    """
    input_tokens = tokens.tokens()
    input: list[str] = [t.value for (t, _, _) in input_tokens]
@ -71,73 +150,43 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
    input = input + ["$"]
    input_index = 0
-    # Our stack is a stack of tuples, where the first entry is the state number
+    threads = [ParserThread(0, trace, [(0, None)])]
-    # and the second entry is the 'value' that was generated when the state was
+
    # pushed.
    stack: list[typing.Tuple[int, TokenValue | Tree | None]] = [(0, None)]
    while True:
-        current_state = stack[-1][0]
+        assert len(threads) > 0
        current_token = input[input_index]
        for thread in threads:
            sr = thread.step(table, current_token, input_index, input_tokens)
            match sr:
                case AcceptResult(value):
                    return (value, [])
-        action = table.actions[current_state].get(current_token, parser.Error())
+                case ContinueResult():
-        if trace:
+                    break
            trace(stack, input, input_index, action)
-        match action:
+                case ErrorResult():
-            case parser.Accept():
+                    if input_index >= len(input_tokens):
-                result = stack[-1][1]
+                        message = "Unexpected end of file"
-                assert isinstance(result, Tree)
+                        start = input_tokens[-1][1]
                return (result, [])
            case parser.Reduce(name=name, count=size, transparent=transparent):
                children: list[TokenValue | Tree] = []
                for _, c in stack[-size:]:
                    if c is None:
                        continue
                    elif isinstance(c, Tree) and c.name is None:
                        children.extend(c.children)
                    else:
-                        children.append(c)
+                        message = f"Syntax error: unexpected symbol {current_token}"
                        (_, start, _) = input_tokens[input_index]
-                value = Tree(
+                    line_index = bisect.bisect_left(tokens.lines, start)
-                    name=name if not transparent else None,
+                    if line_index == 0:
-                    start=children[0].start,
+                        col_start = 0
-                    end=children[-1].end,
+                    else:
-                    children=tuple(children),
+                        col_start = tokens.lines[line_index - 1] + 1
-                )
+                    column_index = start - col_start
-                stack = stack[:-size]
+                    line_index += 1
-                goto = table.gotos[stack[-1][0]].get(name)
+                    error = f"{line_index}:{column_index}: {message}"
-                assert goto is not None
+                    return (None, [error])
-                stack.append((goto, value))
+                case _:
                    typing.assert_never(sr)
-            case parser.Shift(state):
+        # All threads have accepted or errored or consumed input.
-                (kind, start, length) = input_tokens[input_index]
+        input_index += 1
                tval = TokenValue(kind=kind.value, start=start, end=start + length)
                stack.append((state, tval))
                input_index += 1
            case parser.Error():
                if input_index >= len(input_tokens):
                    message = "Unexpected end of file"
                    start = input_tokens[-1][1]
                else:
                    message = f"Syntax error: unexpected symbol {current_token}"
                    (_, start, _) = input_tokens[input_index]
                line_index = bisect.bisect_left(tokens.lines, start)
                if line_index == 0:
                    col_start = 0
                else:
                    col_start = tokens.lines[line_index - 1] + 1
                column_index = start - col_start
                line_index += 1
                error = f"{line_index}:{column_index}: {message}"
                return (None, [error])
            case _:
                raise ValueError(f"Unknown action type: {action}")
 ###############################################################################