Correct NFA construction

There was a bug in the way that I was converting regular expressions to NFAs. I'm still not entirely sure what was going on, but I re-visited the construction and made it follow the literature more closely and it fixed the problem.
2024-08-24 09:24:29 -07:00 · 2024-08-24 09:24:29 -07:00 · 0c952e4905
commit 0c952e4905
parent 30f7798719
2 changed files with 91 additions and 41 deletions
--- a/tests/test_lexer.py
+++ b/tests/test_lexer.py
@ -381,3 +381,37 @@ def test_lexer_compile():
        (LexTest.BLANKS, 5, 1),
        (LexTest.IDENTIFIER, 6, 3),
    ]
+
+
+def test_lexer_numbers():
+    class LexTest(Grammar):
+        @rule
+        def number(self):
+            return self.NUMBER
+
+        start = number
+
+        NUMBER = Terminal(
+            Re.seq(
+                Re.set(("0", "9")).plus(),
+                Re.seq(
+                    Re.literal("."),
+                    Re.set(("0", "9")).plus(),
+                    Re.seq(
+                        Re.set("e", "E"),
+                        Re.set("+", "-").question(),
+                        Re.set(("0", "9")).plus(),
+                    ).question(),
+                ).question(),
+            )
+        )
+
+    lexer = compile_lexer(LexTest())
+    dump_lexer_table(lexer)
+
+    number_string = "1234.12"
+
+    tokens = list(generic_tokenize(number_string, lexer))
+    assert tokens == [
+        (LexTest.NUMBER, 0, len(number_string)),
+    ]