Remove the old hand-lexer

The machine lexer is working now
2024-08-27 16:49:03 -07:00 · 2024-08-27 16:49:03 -07:00 · cd62b65789
commit cd62b65789
parent d62076f3c4
1 changed files with 0 additions and 180 deletions
--- a/grammar.py
+++ b/grammar.py
@ -413,186 +413,6 @@ class FineGrammar(Grammar):
    RSQUARE = Terminal("]", kind=TerminalKind.Punctuation.SquareBracket.Close)


-# -----------------------------------------------------------------------------
-# DORKY LEXER
-# -----------------------------------------------------------------------------
-import bisect
-
-
-NUMBER_RE = re.compile("[0-9]+(\\.[0-9]*([eE][-+]?[0-9]+)?)?")
-IDENTIFIER_RE = re.compile("[_A-Za-z][_A-Za-z0-9]*")
-KEYWORD_TABLE = {
-    "_": FineGrammar.UNDERSCORE,
-    "and": FineGrammar.AND,
-    "as": FineGrammar.AS,
-    "class": FineGrammar.CLASS,
-    "else": FineGrammar.ELSE,
-    "export": FineGrammar.EXPORT,
-    "false": FineGrammar.FALSE,
-    "for": FineGrammar.FOR,
-    "fun": FineGrammar.FUN,
-    "if": FineGrammar.IF,
-    "import": FineGrammar.IMPORT,
-    "in": FineGrammar.IN,
-    "is": FineGrammar.IS,
-    "let": FineGrammar.LET,
-    "match": FineGrammar.MATCH,
-    "new": FineGrammar.NEW,
-    "or": FineGrammar.OR,
-    "return": FineGrammar.RETURN,
-    "self": FineGrammar.SELF,
-    "true": FineGrammar.TRUE,
-    "while": FineGrammar.WHILE,
-}
-
-
-def tokenize(src: str):
-    pos = 0
-    while pos < len(src):
-        ch = src[pos]
-        if ch.isspace():
-            pos += 1
-            continue
-
-        token = None
-        if ch == "-":
-            if src[pos : pos + 2] == "->":
-                token = (FineGrammar.ARROW, pos, 2)
-            else:
-                token = (FineGrammar.MINUS, pos, 1)
-
-        elif ch == "|":
-            token = (FineGrammar.BAR, pos, 1)
-
-        elif ch == ":":
-            token = (FineGrammar.COLON, pos, 1)
-
-        elif ch == "{":
-            token = (FineGrammar.LCURLY, pos, 1)
-
-        elif ch == "}":
-            token = (FineGrammar.RCURLY, pos, 1)
-
-        elif ch == ";":
-            token = (FineGrammar.SEMICOLON, pos, 1)
-
-        elif ch == "=":
-            if src[pos : pos + 2] == "==":
-                token = (FineGrammar.EQUALEQUAL, pos, 2)
-            else:
-                token = (FineGrammar.EQUAL, pos, 1)
-
-        elif ch == "(":
-            token = (FineGrammar.LPAREN, pos, 1)
-
-        elif ch == ")":
-            token = (FineGrammar.RPAREN, pos, 1)
-
-        elif ch == ",":
-            token = (FineGrammar.COMMA, pos, 1)
-
-        elif ch == "!":
-            if src[pos : pos + 2] == "!=":
-                token = (FineGrammar.BANGEQUAL, pos, 2)
-            else:
-                token = (FineGrammar.BANG, pos, 1)
-
-        elif ch == "<":
-            if src[pos : pos + 2] == "<=":
-                token = (FineGrammar.LESSEQUAL, pos, 2)
-            else:
-                token = (FineGrammar.LESS, pos, 1)
-
-        elif ch == ">":
-            if src[pos : pos + 2] == ">=":
-                token = (FineGrammar.GREATEREQUAL, pos, 2)
-            else:
-                token = (FineGrammar.GREATER, pos, 1)
-
-        elif ch == "+":
-            token = (FineGrammar.PLUS, pos, 1)
-
-        elif ch == "*":
-            token = (FineGrammar.STAR, pos, 1)
-
-        elif ch == "/":
-            if src[pos : pos + 2] == "//":
-                while pos < len(src) and src[pos] != "\n":
-                    pos = pos + 1
-                continue
-
-            token = (FineGrammar.SLASH, pos, 1)
-
-        elif ch == ".":
-            token = (FineGrammar.DOT, pos, 1)
-
-        elif ch == "[":
-            token = (FineGrammar.LSQUARE, pos, 1)
-
-        elif ch == "]":
-            token = (FineGrammar.RSQUARE, pos, 1)
-
-        elif ch == '"' or ch == "'":
-            end = pos + 1
-            while end < len(src) and src[end] != ch:
-                if src[end] == "\\":
-                    end += 1
-                end += 1
-            if end == len(src):
-                raise Exception(f"Unterminated string constant at {pos}")
-            end += 1
-            token = (FineGrammar.STRING, pos, end - pos)
-
-        else:
-            number_match = NUMBER_RE.match(src, pos)
-            if number_match:
-                token = (FineGrammar.NUMBER, pos, number_match.end() - pos)
-            else:
-                id_match = IDENTIFIER_RE.match(src, pos)
-                if id_match:
-                    fragment = src[pos : id_match.end()]
-                    keyword = KEYWORD_TABLE.get(fragment)
-                    if keyword:
-                        token = (keyword, pos, len(fragment))
-                    else:
-                        token = (FineGrammar.IDENTIFIER, pos, len(fragment))
-
-        if token is None:
-            raise Exception("Token error")
-        yield token
-        pos += token[2]
-
-
-class FineTokens:
-    def __init__(self, src: str):
-        self.src = src
-        self._tokens: list[typing.Tuple[Terminal, int, int]] = list(tokenize(src))
-        self._lines = [m.start() for m in re.finditer("\n", src)]
-
-    def tokens(self):
-        return self._tokens
-
-    def lines(self):
-        return self._lines
-
-    def dump(self, *, start=None, end=None):
-        if start is None:
-            start = 0
-        if end is None:
-            end = len(self._tokens)
-
-        for token in self._tokens[start:end]:
-            (kind, start, length) = token
-            line_index = bisect.bisect_left(self._lines, start)
-            if line_index == 0:
-                col_start = 0
-            else:
-                col_start = self._lines[line_index - 1] + 1
-            column_index = start - col_start
-            value = self.src[start : start + length]
-            print(f"{start:04} {kind.value:12} {value} ({line_index}, {column_index})")
-
-
 if __name__ == "__main__":
    from parser.parser import compile_lexer, dump_lexer_table