Record trivia in tokens
This will make our formatting better I think.
This commit is contained in:
parent
8a80bcad64
commit
b3b2102864
3 changed files with 90 additions and 12 deletions
|
|
@ -13,6 +13,8 @@ class TokenValue:
|
||||||
kind: str
|
kind: str
|
||||||
start: int
|
start: int
|
||||||
end: int
|
end: int
|
||||||
|
pre_trivia: list["TokenValue"]
|
||||||
|
post_trivia: list["TokenValue"]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -313,17 +315,50 @@ class Parser:
|
||||||
self.table = table
|
self.table = table
|
||||||
|
|
||||||
def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
|
def parse(self, tokens: TokenStream) -> typing.Tuple[Tree | None, list[str]]:
|
||||||
# TODO: If this were a for reals for reals parser we would keep the trivia
|
|
||||||
# accessible in the tree.
|
|
||||||
input_tokens = tokens.tokens()
|
input_tokens = tokens.tokens()
|
||||||
input: list[TokenValue] = [
|
|
||||||
TokenValue(kind=kind.name, start=start, end=start + length)
|
# Filter the input tokens, to generate a list of non-trivia tokens.
|
||||||
for (kind, start, length) in input_tokens
|
# In addition, track the trivia tokens we find along the way, and put
|
||||||
if kind.name is not None and kind.name not in self.table.trivia
|
# them into a list attached to each non-trivia token, so we can
|
||||||
]
|
# actually recover the document *as written*.
|
||||||
|
input: list[TokenValue] = []
|
||||||
|
trivia: list[TokenValue] = []
|
||||||
|
for kind, start, length in input_tokens:
|
||||||
|
assert kind.name is not None
|
||||||
|
if kind.name in self.table.trivia:
|
||||||
|
trivia.append(
|
||||||
|
TokenValue(
|
||||||
|
kind=kind.name,
|
||||||
|
start=start,
|
||||||
|
end=start + length,
|
||||||
|
pre_trivia=[],
|
||||||
|
post_trivia=[],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
prev_trivia = trivia
|
||||||
|
trivia = []
|
||||||
|
|
||||||
|
input.append(
|
||||||
|
TokenValue(
|
||||||
|
kind=kind.name,
|
||||||
|
start=start,
|
||||||
|
end=start + length,
|
||||||
|
pre_trivia=prev_trivia,
|
||||||
|
post_trivia=trivia,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
eof = 0 if len(input) == 0 else input[-1].end
|
eof = 0 if len(input) == 0 else input[-1].end
|
||||||
input = input + [TokenValue(kind="$", start=eof, end=eof)]
|
input = input + [
|
||||||
|
TokenValue(
|
||||||
|
kind="$",
|
||||||
|
start=eof,
|
||||||
|
end=eof,
|
||||||
|
pre_trivia=trivia,
|
||||||
|
post_trivia=[],
|
||||||
|
)
|
||||||
|
]
|
||||||
input_index = 0
|
input_index = 0
|
||||||
|
|
||||||
# Our stack is a stack of tuples, where the first entry is the state
|
# Our stack is a stack of tuples, where the first entry is the state
|
||||||
|
|
@ -428,7 +463,14 @@ class Parser:
|
||||||
assert repair.value is not None
|
assert repair.value is not None
|
||||||
pos = input[cursor].end
|
pos = input[cursor].end
|
||||||
input.insert(
|
input.insert(
|
||||||
cursor, TokenValue(kind=repair.value, start=pos, end=pos)
|
cursor,
|
||||||
|
TokenValue(
|
||||||
|
kind=repair.value,
|
||||||
|
start=pos,
|
||||||
|
end=pos,
|
||||||
|
pre_trivia=[],
|
||||||
|
post_trivia=[],
|
||||||
|
),
|
||||||
)
|
)
|
||||||
cursor += 1
|
cursor += 1
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -255,7 +255,16 @@ class Matcher:
|
||||||
table = self.table
|
table = self.table
|
||||||
|
|
||||||
input = [(child_to_name(i), i) for i in items] + [
|
input = [(child_to_name(i), i) for i in items] + [
|
||||||
("$", runtime.TokenValue(kind="$", start=0, end=0))
|
(
|
||||||
|
"$",
|
||||||
|
runtime.TokenValue(
|
||||||
|
kind="$",
|
||||||
|
start=0,
|
||||||
|
end=0,
|
||||||
|
pre_trivia=[],
|
||||||
|
post_trivia=[],
|
||||||
|
),
|
||||||
|
)
|
||||||
]
|
]
|
||||||
input_index = 0
|
input_index = 0
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@ class Tokens:
|
||||||
|
|
||||||
def _tree(treeform) -> runtime.Tree | runtime.TokenValue:
|
def _tree(treeform) -> runtime.Tree | runtime.TokenValue:
|
||||||
if isinstance(treeform, str):
|
if isinstance(treeform, str):
|
||||||
return runtime.TokenValue(treeform, 0, 0)
|
return runtime.TokenValue(treeform, 0, 0, [], [])
|
||||||
else:
|
else:
|
||||||
assert isinstance(treeform, tuple)
|
assert isinstance(treeform, tuple)
|
||||||
name = treeform[0]
|
name = treeform[0]
|
||||||
|
|
@ -277,7 +277,34 @@ def test_grammar_ignore_trivia():
|
||||||
)
|
)
|
||||||
|
|
||||||
assert errors == []
|
assert errors == []
|
||||||
assert tree == _tree(("sentence", ("sentence", "WORD"), "WORD"))
|
assert tree == runtime.Tree(
|
||||||
|
"sentence",
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
(
|
||||||
|
runtime.Tree(
|
||||||
|
"sentence",
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
(
|
||||||
|
runtime.TokenValue(
|
||||||
|
"WORD",
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
[],
|
||||||
|
[runtime.TokenValue("BLANK", 0, 0, [], [])],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
runtime.TokenValue(
|
||||||
|
"WORD",
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
[runtime.TokenValue("BLANK", 0, 0, [], [])],
|
||||||
|
[runtime.TokenValue("BLANK", 0, 0, [], [])],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_grammar_unknown_trivia():
|
def test_grammar_unknown_trivia():
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue