Augment number pattern, tests
More robust testing. Error messages would be nice but.
This commit is contained in:
parent
0c952e4905
commit
f29ec5072f
2 changed files with 23 additions and 17 deletions
|
|
@ -441,7 +441,7 @@ def generic_tokenize(
|
||||||
last_accept = None
|
last_accept = None
|
||||||
last_accept_pos = 0
|
last_accept_pos = 0
|
||||||
|
|
||||||
print(f"LEXING: {src} ({len(src)})")
|
# print(f"LEXING: {src} ({len(src)})")
|
||||||
|
|
||||||
while pos < len(src):
|
while pos < len(src):
|
||||||
while state is not None:
|
while state is not None:
|
||||||
|
|
@ -450,37 +450,39 @@ def generic_tokenize(
|
||||||
last_accept = accept
|
last_accept = accept
|
||||||
last_accept_pos = pos
|
last_accept_pos = pos
|
||||||
|
|
||||||
print(f" @ {pos} state: {state} ({accept})")
|
# print(f" @ {pos} state: {state} ({accept})")
|
||||||
if pos >= len(src):
|
if pos >= len(src):
|
||||||
break
|
break
|
||||||
|
|
||||||
char = ord(src[pos])
|
char = ord(src[pos])
|
||||||
print(f" -> char: {char} ({repr(src[pos])})")
|
# print(f" -> char: {char} ({repr(src[pos])})")
|
||||||
|
|
||||||
# Find the index of the span where the upper value is the tightest
|
# Find the index of the span where the upper value is the tightest
|
||||||
# bound on the character.
|
# bound on the character.
|
||||||
state = None
|
state = None
|
||||||
index = bisect.bisect_right(edges, char, key=lambda x: x[0].upper)
|
index = bisect.bisect_right(edges, char, key=lambda x: x[0].upper)
|
||||||
print(f" -> {index}")
|
# print(f" -> {index}")
|
||||||
if index < len(edges):
|
if index < len(edges):
|
||||||
span, target = edges[index]
|
span, target = edges[index]
|
||||||
print(f" -> {span}, {target}")
|
# print(f" -> {span}, {target}")
|
||||||
if char >= span.lower:
|
if char >= span.lower:
|
||||||
print(f" -> target: {target}")
|
# print(f" -> target: {target}")
|
||||||
state = target
|
state = target
|
||||||
pos += 1
|
pos += 1
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(f" Nope (outside range)")
|
# print(f" Nope (outside range)")
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
print(f" Nope (at end)")
|
# print(f" Nope (at end)")
|
||||||
|
pass
|
||||||
|
|
||||||
if last_accept is None:
|
if last_accept is None:
|
||||||
raise Exception(f"Token error at {pos}")
|
raise Exception(f"Token error at {pos}")
|
||||||
|
|
||||||
yield (last_accept, start, last_accept_pos - start)
|
yield (last_accept, start, last_accept_pos - start)
|
||||||
|
|
||||||
print(f" Yield: {last_accept}, reset to {last_accept_pos}")
|
# print(f" Yield: {last_accept}, reset to {last_accept_pos}")
|
||||||
last_accept = None
|
last_accept = None
|
||||||
pos = last_accept_pos
|
pos = last_accept_pos
|
||||||
start = pos
|
start = pos
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import collections
|
import collections
|
||||||
|
import math
|
||||||
|
|
||||||
from hypothesis import assume, example, given
|
from hypothesis import assume, example, given
|
||||||
from hypothesis.strategies import integers, lists
|
from hypothesis.strategies import integers, lists, floats
|
||||||
|
|
||||||
|
|
||||||
from parser import (
|
from parser import (
|
||||||
|
|
@ -383,7 +384,10 @@ def test_lexer_compile():
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_lexer_numbers():
|
@given(floats().map(abs))
|
||||||
|
def test_lexer_numbers(n: float):
|
||||||
|
assume(math.isfinite(n))
|
||||||
|
|
||||||
class LexTest(Grammar):
|
class LexTest(Grammar):
|
||||||
@rule
|
@rule
|
||||||
def number(self):
|
def number(self):
|
||||||
|
|
@ -397,11 +401,11 @@ def test_lexer_numbers():
|
||||||
Re.seq(
|
Re.seq(
|
||||||
Re.literal("."),
|
Re.literal("."),
|
||||||
Re.set(("0", "9")).plus(),
|
Re.set(("0", "9")).plus(),
|
||||||
Re.seq(
|
).question(),
|
||||||
Re.set("e", "E"),
|
Re.seq(
|
||||||
Re.set("+", "-").question(),
|
Re.set("e", "E"),
|
||||||
Re.set(("0", "9")).plus(),
|
Re.set("+", "-").question(),
|
||||||
).question(),
|
Re.set(("0", "9")).plus(),
|
||||||
).question(),
|
).question(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -409,7 +413,7 @@ def test_lexer_numbers():
|
||||||
lexer = compile_lexer(LexTest())
|
lexer = compile_lexer(LexTest())
|
||||||
dump_lexer_table(lexer)
|
dump_lexer_table(lexer)
|
||||||
|
|
||||||
number_string = "1234.12"
|
number_string = str(n)
|
||||||
|
|
||||||
tokens = list(generic_tokenize(number_string, lexer))
|
tokens = list(generic_tokenize(number_string, lexer))
|
||||||
assert tokens == [
|
assert tokens == [
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue