Compare commits

..

No commits in common. "b2e7d15fb886753dbc993788a4eedff004bc33ac" and "5f19b1e73ec42ba353ef4a4cf185ca19b01b2915" have entirely different histories.

3 changed files with 169 additions and 181 deletions

View file

@ -8,22 +8,6 @@ from dataclasses import dataclass
from . import parser from . import parser
def offset_to_line_column(lines: list[int], pos: int) -> tuple[int, int]:
"""Convert a text offset to a line number and column number given a list
of line break positions. This is used to make errors intelligible. Lines
are 1-based, and columns are 0-based, in accordance with editor
traditions.
"""
line_index = bisect.bisect_left(lines, pos)
if line_index == 0:
col_start = 0
else:
col_start = lines[line_index - 1] + 1
column_index = pos - col_start
line_index += 1
return (line_index, column_index)
@dataclass @dataclass
class TokenValue: class TokenValue:
kind: str kind: str
@ -613,16 +597,21 @@ class Parser:
if errors: if errors:
lines = tokens.lines() lines = tokens.lines()
for parse_error in errors: for parse_error in errors:
line_index, column_index = offset_to_line_column(lines, parse_error.start) line_index = bisect.bisect_left(lines, parse_error.start)
if line_index == 0:
col_start = 0
else:
col_start = lines[line_index - 1] + 1
column_index = parse_error.start - col_start
line_index += 1
error_strings.append(f"{line_index}:{column_index}: {parse_error.message}") error_strings.append(f"{line_index}:{column_index}: {parse_error.message}")
return (result, error_strings) return (result, error_strings)
def generic_tokenize( def generic_tokenize(
src: str, src: str, table: parser.LexerTable
table: parser.LexerTable,
lines: list[int],
) -> typing.Iterable[tuple[parser.Terminal, int, int]]: ) -> typing.Iterable[tuple[parser.Terminal, int, int]]:
pos = 0 pos = 0
state = 0 state = 0
@ -658,8 +647,7 @@ def generic_tokenize(
pass pass
if last_accept is None: if last_accept is None:
line_index, column_index = offset_to_line_column(lines, pos) raise Exception(f"Token error at {pos}")
raise Exception(f"{line_index}:{column_index}: Unexpected character '{src[pos]}'")
yield (last_accept, start, last_accept_pos - start) yield (last_accept, start, last_accept_pos - start)
@ -673,10 +661,10 @@ class GenericTokenStream:
def __init__(self, src: str, lexer: parser.LexerTable): def __init__(self, src: str, lexer: parser.LexerTable):
self.src = src self.src = src
self.lexer = lexer self.lexer = lexer
self._lines = [m.start() for m in re.finditer("\n", src)]
self._tokens: list[typing.Tuple[parser.Terminal, int, int]] = list( self._tokens: list[typing.Tuple[parser.Terminal, int, int]] = list(
generic_tokenize(src, lexer, self._lines) generic_tokenize(src, lexer)
) )
self._lines = [m.start() for m in re.finditer("\n", src)]
def tokens(self): def tokens(self):
return self._tokens return self._tokens