Correct NFA construction

There was a bug in the way that I was converting regular expressions
to NFAs. I'm still not entirely sure what was going on, but I
re-visited the construction and made it follow the literature more
closely and it fixed the problem.
This commit is contained in:
John Doty 2024-08-24 09:24:29 -07:00
parent 30f7798719
commit 0c952e4905
2 changed files with 91 additions and 41 deletions

View file

@ -381,3 +381,37 @@ def test_lexer_compile():
(LexTest.BLANKS, 5, 1),
(LexTest.IDENTIFIER, 6, 3),
]
def test_lexer_numbers():
class LexTest(Grammar):
@rule
def number(self):
return self.NUMBER
start = number
NUMBER = Terminal(
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
).question(),
)
)
lexer = compile_lexer(LexTest())
dump_lexer_table(lexer)
number_string = "1234.12"
tokens = list(generic_tokenize(number_string, lexer))
assert tokens == [
(LexTest.NUMBER, 0, len(number_string)),
]