Correct NFA construction
There was a bug in the way that I was converting regular expressions to NFAs. I'm still not entirely sure what was going on, but I re-visited the construction and made it follow the literature more closely and it fixed the problem.
This commit is contained in:
parent
30f7798719
commit
0c952e4905
2 changed files with 91 additions and 41 deletions
|
|
@ -381,3 +381,37 @@ def test_lexer_compile():
|
|||
(LexTest.BLANKS, 5, 1),
|
||||
(LexTest.IDENTIFIER, 6, 3),
|
||||
]
|
||||
|
||||
|
||||
def test_lexer_numbers():
|
||||
class LexTest(Grammar):
|
||||
@rule
|
||||
def number(self):
|
||||
return self.NUMBER
|
||||
|
||||
start = number
|
||||
|
||||
NUMBER = Terminal(
|
||||
Re.seq(
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
Re.literal("."),
|
||||
Re.set(("0", "9")).plus(),
|
||||
Re.seq(
|
||||
Re.set("e", "E"),
|
||||
Re.set("+", "-").question(),
|
||||
Re.set(("0", "9")).plus(),
|
||||
).question(),
|
||||
).question(),
|
||||
)
|
||||
)
|
||||
|
||||
lexer = compile_lexer(LexTest())
|
||||
dump_lexer_table(lexer)
|
||||
|
||||
number_string = "1234.12"
|
||||
|
||||
tokens = list(generic_tokenize(number_string, lexer))
|
||||
assert tokens == [
|
||||
(LexTest.NUMBER, 0, len(number_string)),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue