Compare commits
4 commits
7c1d9b5f2b
...
bde22a5c99
| Author | SHA1 | Date | |
|---|---|---|---|
| bde22a5c99 | |||
| 8d58c64040 | |||
| 0fc04cf11e | |||
| 797ec8cd76 |
3 changed files with 390 additions and 71 deletions
201
grammar.py
201
grammar.py
|
|
@ -1,4 +1,6 @@
|
||||||
# This is an example grammar.
|
# This is an example grammar.
|
||||||
|
import re
|
||||||
|
|
||||||
from parser import Assoc, Grammar, Nothing, Token, rule, seq
|
from parser import Assoc, Grammar, Nothing, Token, rule, seq
|
||||||
|
|
||||||
ARROW = Token("Arrow")
|
ARROW = Token("Arrow")
|
||||||
|
|
@ -119,7 +121,7 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def alternate_type(self):
|
def alternate_type(self):
|
||||||
return seq(self.type_expression, BAR, self.type_identifier)
|
return seq(self.type_expression, OR, self.type_identifier)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def type_identifier(self):
|
def type_identifier(self):
|
||||||
|
|
@ -170,6 +172,7 @@ class FineGrammar(Grammar):
|
||||||
def block(self):
|
def block(self):
|
||||||
return (
|
return (
|
||||||
seq(LCURLY, RCURLY)
|
seq(LCURLY, RCURLY)
|
||||||
|
| seq(LCURLY, self.expression, RCURLY)
|
||||||
| seq(LCURLY, self.statement_list, RCURLY)
|
| seq(LCURLY, self.statement_list, RCURLY)
|
||||||
| seq(LCURLY, self.statement_list, self.expression, RCURLY)
|
| seq(LCURLY, self.statement_list, self.expression, RCURLY)
|
||||||
)
|
)
|
||||||
|
|
@ -196,7 +199,7 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def return_statement(self):
|
def return_statement(self):
|
||||||
return seq(RETURN, self.expression, SEMICOLON)
|
return seq(RETURN, self.expression, SEMICOLON) | seq(RETURN, SEMICOLON)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def for_statement(self):
|
def for_statement(self):
|
||||||
|
|
@ -254,6 +257,7 @@ class FineGrammar(Grammar):
|
||||||
| seq(self.relation_expression, LESSEQUAL, self.additive_expression)
|
| seq(self.relation_expression, LESSEQUAL, self.additive_expression)
|
||||||
| seq(self.relation_expression, GREATER, self.additive_expression)
|
| seq(self.relation_expression, GREATER, self.additive_expression)
|
||||||
| seq(self.relation_expression, GREATEREQUAL, self.additive_expression)
|
| seq(self.relation_expression, GREATEREQUAL, self.additive_expression)
|
||||||
|
| self.additive_expression
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
|
|
@ -288,6 +292,7 @@ class FineGrammar(Grammar):
|
||||||
| self.list_constructor_expression
|
| self.list_constructor_expression
|
||||||
| self.object_constructor_expression
|
| self.object_constructor_expression
|
||||||
| self.match_expression
|
| self.match_expression
|
||||||
|
| seq(self.primary_expression, LPAREN, RPAREN)
|
||||||
| seq(self.primary_expression, LPAREN, self.expression_list, RPAREN)
|
| seq(self.primary_expression, LPAREN, self.expression_list, RPAREN)
|
||||||
| seq(self.primary_expression, DOT, IDENTIFIER)
|
| seq(self.primary_expression, DOT, IDENTIFIER)
|
||||||
| seq(LPAREN, self.expression, RPAREN)
|
| seq(LPAREN, self.expression, RPAREN)
|
||||||
|
|
@ -315,7 +320,7 @@ class FineGrammar(Grammar):
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_expression(self):
|
def match_expression(self):
|
||||||
return seq(MATCH, self.match_body)
|
return seq(MATCH, self.expression, self.match_body)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_body(self):
|
def match_body(self):
|
||||||
|
|
@ -375,15 +380,187 @@ class FineGrammar(Grammar):
|
||||||
return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
|
return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
|
||||||
|
|
||||||
|
|
||||||
grammar = FineGrammar()
|
# -----------------------------------------------------------------------------
|
||||||
table = grammar.build_table(start="file")
|
# DORKY LEXER
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
NUMBER_RE = re.compile("[0-9]+(\\.[0-9]*([eE][-+]?[0-9]+)?)?")
|
||||||
|
IDENTIFIER_RE = re.compile("[_A-Za-z][_A-Za-z0-9]*")
|
||||||
|
KEYWORD_TABLE = {
|
||||||
|
"_": UNDERSCORE,
|
||||||
|
"and": AND,
|
||||||
|
"as": AS,
|
||||||
|
"class": CLASS,
|
||||||
|
"else": ELSE,
|
||||||
|
"export": EXPORT,
|
||||||
|
"false": FALSE,
|
||||||
|
"for": FOR,
|
||||||
|
"fun": FUN,
|
||||||
|
"if": IF,
|
||||||
|
"import": IMPORT,
|
||||||
|
"in": IN,
|
||||||
|
"is": IS,
|
||||||
|
"let": LET,
|
||||||
|
"match": MATCH,
|
||||||
|
"new": NEW,
|
||||||
|
"or": OR,
|
||||||
|
"return": RETURN,
|
||||||
|
"self": SELF,
|
||||||
|
"true": TRUE,
|
||||||
|
"while": WHILE,
|
||||||
|
}
|
||||||
|
|
||||||
print(f"{len(table)} states")
|
|
||||||
|
|
||||||
average_entries = sum(len(row) for row in table) / len(table)
|
def tokenize(src: str):
|
||||||
max_entries = max(len(row) for row in table)
|
pos = 0
|
||||||
print(f"{average_entries} average, {max_entries} max")
|
while pos < len(src):
|
||||||
|
ch = src[pos]
|
||||||
|
if ch.isspace():
|
||||||
|
pos += 1
|
||||||
|
continue
|
||||||
|
|
||||||
# print(parser_faster.format_table(gen, table))
|
token = None
|
||||||
# print()
|
if ch == "-":
|
||||||
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
|
if src[pos : pos + 2] == "->":
|
||||||
|
token = (ARROW, pos, 2)
|
||||||
|
else:
|
||||||
|
token = (MINUS, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "|":
|
||||||
|
token = (BAR, pos, 1)
|
||||||
|
|
||||||
|
elif ch == ":":
|
||||||
|
token = (COLON, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "{":
|
||||||
|
token = (LCURLY, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "}":
|
||||||
|
token = (RCURLY, pos, 1)
|
||||||
|
|
||||||
|
elif ch == ";":
|
||||||
|
token = (SEMICOLON, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "=":
|
||||||
|
if src[pos : pos + 2] == "==":
|
||||||
|
token = (EQUALEQUAL, pos, 2)
|
||||||
|
else:
|
||||||
|
token = (EQUAL, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "(":
|
||||||
|
token = (LPAREN, pos, 1)
|
||||||
|
|
||||||
|
elif ch == ")":
|
||||||
|
token = (RPAREN, pos, 1)
|
||||||
|
|
||||||
|
elif ch == ",":
|
||||||
|
token = (COMMA, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "!":
|
||||||
|
if src[pos : pos + 2] == "!=":
|
||||||
|
token = (BANGEQUAL, pos, 2)
|
||||||
|
else:
|
||||||
|
token = (BANG, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "<":
|
||||||
|
if src[pos : pos + 2] == "<=":
|
||||||
|
token = (LESSEQUAL, pos, 2)
|
||||||
|
else:
|
||||||
|
token = (LESS, pos, 1)
|
||||||
|
|
||||||
|
elif ch == ">":
|
||||||
|
if src[pos : pos + 2] == ">=":
|
||||||
|
token = (GREATEREQUAL, pos, 2)
|
||||||
|
else:
|
||||||
|
token = (GREATER, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "+":
|
||||||
|
token = (PLUS, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "*":
|
||||||
|
token = (STAR, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "/":
|
||||||
|
if src[pos : pos + 2] == "//":
|
||||||
|
while pos < len(src) and src[pos] != "\n":
|
||||||
|
pos = pos + 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
token = (SLASH, pos, 1)
|
||||||
|
|
||||||
|
elif ch == ".":
|
||||||
|
token = (DOT, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "[":
|
||||||
|
token = (LSQUARE, pos, 1)
|
||||||
|
|
||||||
|
elif ch == "]":
|
||||||
|
token = (RSQUARE, pos, 1)
|
||||||
|
|
||||||
|
elif ch == '"' or ch == "'":
|
||||||
|
end = pos + 1
|
||||||
|
while end < len(src) and src[end] != ch:
|
||||||
|
if src[end] == "\\":
|
||||||
|
end += 1
|
||||||
|
end += 1
|
||||||
|
if end == len(src):
|
||||||
|
raise Exception(f"Unterminated string constant at {pos}")
|
||||||
|
end += 1
|
||||||
|
token = (STRING, pos, end - pos)
|
||||||
|
|
||||||
|
else:
|
||||||
|
number_match = NUMBER_RE.match(src, pos)
|
||||||
|
if number_match:
|
||||||
|
token = (NUMBER, pos, number_match.end() - pos)
|
||||||
|
else:
|
||||||
|
id_match = IDENTIFIER_RE.match(src, pos)
|
||||||
|
if id_match:
|
||||||
|
fragment = src[pos : id_match.end()]
|
||||||
|
keyword = KEYWORD_TABLE.get(fragment)
|
||||||
|
if keyword:
|
||||||
|
token = (keyword, pos, len(fragment))
|
||||||
|
else:
|
||||||
|
token = (IDENTIFIER, pos, len(fragment))
|
||||||
|
|
||||||
|
if token is None:
|
||||||
|
raise Exception("Token error")
|
||||||
|
yield token
|
||||||
|
pos += token[2]
|
||||||
|
|
||||||
|
|
||||||
|
import bisect
|
||||||
|
|
||||||
|
|
||||||
|
class FineTokens:
|
||||||
|
def __init__(self, src: str):
|
||||||
|
self.src = src
|
||||||
|
self.tokens = list(tokenize(src))
|
||||||
|
self.lines = [m.start() for m in re.finditer("\n", src)]
|
||||||
|
|
||||||
|
def dump(self, *, start=None, end=None):
|
||||||
|
if start is None:
|
||||||
|
start = 0
|
||||||
|
if end is None:
|
||||||
|
end = len(self.tokens)
|
||||||
|
|
||||||
|
for token in self.tokens[start:end]:
|
||||||
|
(kind, start, length) = token
|
||||||
|
line_index = bisect.bisect_left(self.lines, start)
|
||||||
|
if line_index == 0:
|
||||||
|
col_start = 0
|
||||||
|
else:
|
||||||
|
col_start = self.lines[line_index - 1] + 1
|
||||||
|
column_index = start - col_start
|
||||||
|
print(
|
||||||
|
f"{start:04} {kind.value:12} {self.src[start:start+length]} ({line_index}, {column_index})"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
grammar = FineGrammar()
|
||||||
|
table = grammar.build_table(start="expression")
|
||||||
|
|
||||||
|
print(f"{len(table)} states")
|
||||||
|
|
||||||
|
average_entries = sum(len(row) for row in table) / len(table)
|
||||||
|
max_entries = max(len(row) for row in table)
|
||||||
|
print(f"{average_entries} average, {max_entries} max")
|
||||||
|
|
|
||||||
130
harness.py
Normal file
130
harness.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
import bisect
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import grammar
|
||||||
|
import parser
|
||||||
|
|
||||||
|
# from parser import Token, Grammar, rule, seq
|
||||||
|
|
||||||
|
|
||||||
|
def trace_state(stack, input, input_index, action):
|
||||||
|
print(
|
||||||
|
"{stack: <20} {input: <50} {action: <5}".format(
|
||||||
|
stack=repr([s[0] for s in stack]),
|
||||||
|
input=repr(input[input_index : input_index + 4]),
|
||||||
|
action=repr(action),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse(table, tokens, trace=None):
|
||||||
|
"""Parse the input with the generated parsing table and return the
|
||||||
|
concrete syntax tree.
|
||||||
|
|
||||||
|
The parsing table can be generated by GenerateLR0.gen_table() or by any
|
||||||
|
of the other generators below. The parsing mechanism never changes, only
|
||||||
|
the table generation mechanism.
|
||||||
|
|
||||||
|
input is a list of tokens. Don't stick an end-of-stream marker, I'll stick
|
||||||
|
one on for you.
|
||||||
|
|
||||||
|
This is not a *great* parser, it's really just a demo for what you can
|
||||||
|
do with the table.
|
||||||
|
"""
|
||||||
|
input = [t.value for (t, _, _) in tokens.tokens]
|
||||||
|
|
||||||
|
assert "$" not in input
|
||||||
|
input = input + ["$"]
|
||||||
|
input_index = 0
|
||||||
|
|
||||||
|
# Our stack is a stack of tuples, where the first entry is the state number
|
||||||
|
# and the second entry is the 'value' that was generated when the state was
|
||||||
|
# pushed.
|
||||||
|
stack: list[typing.Tuple[int, typing.Any]] = [(0, None)]
|
||||||
|
while True:
|
||||||
|
current_state = stack[-1][0]
|
||||||
|
current_token = input[input_index]
|
||||||
|
|
||||||
|
action = table[current_state].get(current_token, ("error",))
|
||||||
|
if trace:
|
||||||
|
trace(stack, input, input_index, action)
|
||||||
|
|
||||||
|
if action[0] == "accept":
|
||||||
|
return (stack[-1][1], [])
|
||||||
|
|
||||||
|
elif action[0] == "reduce":
|
||||||
|
name = action[1]
|
||||||
|
size = action[2]
|
||||||
|
|
||||||
|
value = (name, tuple(s[1] for s in stack[-size:]))
|
||||||
|
stack = stack[:-size]
|
||||||
|
|
||||||
|
goto = table[stack[-1][0]].get(name, ("error",))
|
||||||
|
assert goto[0] == "goto" # Corrupt table?
|
||||||
|
stack.append((goto[1], value))
|
||||||
|
|
||||||
|
elif action[0] == "shift":
|
||||||
|
stack.append((action[1], (current_token, ())))
|
||||||
|
input_index += 1
|
||||||
|
|
||||||
|
elif action[0] == "error":
|
||||||
|
if input_index >= len(tokens.tokens):
|
||||||
|
raise ValueError("Unexpected end of file")
|
||||||
|
else:
|
||||||
|
(_, start, _) = tokens.tokens[input_index]
|
||||||
|
line_index = bisect.bisect_left(tokens.lines, start)
|
||||||
|
if line_index == 0:
|
||||||
|
col_start = 0
|
||||||
|
else:
|
||||||
|
col_start = tokens.lines[line_index - 1] + 1
|
||||||
|
column_index = start - col_start
|
||||||
|
line_index += 1
|
||||||
|
|
||||||
|
return (
|
||||||
|
None,
|
||||||
|
[
|
||||||
|
f"{line_index}:{column_index}: Syntax error: unexpected symbol {current_token}"
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def harness(lexer_func, grammar_func, start_rule, source_path):
|
||||||
|
# generator = parser.GenerateLR1
|
||||||
|
generator = parser.GenerateLALR
|
||||||
|
table = grammar_func().build_table(start=start_rule, generator=generator)
|
||||||
|
print(f"{len(table)} states")
|
||||||
|
|
||||||
|
average_entries = sum(len(row) for row in table) / len(table)
|
||||||
|
max_entries = max(len(row) for row in table)
|
||||||
|
print(f"{average_entries} average, {max_entries} max")
|
||||||
|
|
||||||
|
if source_path:
|
||||||
|
with open(source_path, "r", encoding="utf-8") as f:
|
||||||
|
src = f.read()
|
||||||
|
tokens = lexer_func(src)
|
||||||
|
# print(f"{tokens.lines}")
|
||||||
|
# tokens.dump(end=5)
|
||||||
|
(_, errors) = parse(table, tokens)
|
||||||
|
if len(errors) > 0:
|
||||||
|
print(f"{len(errors)} errors:")
|
||||||
|
for error in errors:
|
||||||
|
print(f" {error}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
|
||||||
|
source_path = None
|
||||||
|
if len(sys.argv) == 2:
|
||||||
|
source_path = sys.argv[1]
|
||||||
|
|
||||||
|
harness(
|
||||||
|
lexer_func=grammar.FineTokens,
|
||||||
|
grammar_func=grammar.FineGrammar,
|
||||||
|
start_rule="file",
|
||||||
|
source_path=source_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
# print(parser_faster.format_table(gen, table))
|
||||||
|
# print()
|
||||||
|
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
|
||||||
130
parser.py
130
parser.py
|
|
@ -257,6 +257,14 @@ class Configuration:
|
||||||
lookahead=(),
|
lookahead=(),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def replace_lookahead(self, lookahead: typing.Tuple[int, ...]):
|
||||||
|
return Configuration(
|
||||||
|
name=self.name,
|
||||||
|
symbols=self.symbols,
|
||||||
|
position=self.position,
|
||||||
|
lookahead=lookahead,
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def rest(self):
|
def rest(self):
|
||||||
return self.symbols[(self.position + 1) :]
|
return self.symbols[(self.position + 1) :]
|
||||||
|
|
@ -1382,57 +1390,67 @@ class GenerateLALR(GenerateLR1):
|
||||||
use a bunch of improvement, probably.)
|
use a bunch of improvement, probably.)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def merge_sets(self, config_set_a, config_set_b):
|
def gen_sets(self, config_set: typing.Tuple[Configuration, ...]) -> ConfigurationSetInfo:
|
||||||
"""Merge the two config sets, by keeping the item cores but merging
|
|
||||||
the lookahead sets for each item.
|
|
||||||
"""
|
|
||||||
assert len(config_set_a) == len(config_set_b)
|
|
||||||
merged = []
|
|
||||||
for index, a in enumerate(config_set_a):
|
|
||||||
b = config_set_b[index]
|
|
||||||
assert a.clear_lookahead() == b.clear_lookahead()
|
|
||||||
|
|
||||||
new_lookahead = a.lookahead + b.lookahead
|
|
||||||
new_lookahead = tuple(sorted(set(new_lookahead)))
|
|
||||||
merged.append(a.clear_lookahead())
|
|
||||||
|
|
||||||
return tuple(merged)
|
|
||||||
|
|
||||||
def sets_equal(self, a, b):
|
|
||||||
a_no_la = tuple(s.clear_lookahead() for s in a)
|
|
||||||
b_no_la = tuple(s.clear_lookahead() for s in b)
|
|
||||||
return a_no_la == b_no_la
|
|
||||||
|
|
||||||
def gen_sets(self, config_set) -> ConfigurationSetInfo:
|
|
||||||
"""Recursively generate all configuration sets starting from the
|
"""Recursively generate all configuration sets starting from the
|
||||||
provided set, and merge them with the provided set 'F'.
|
provided set.
|
||||||
|
|
||||||
The difference between this method and the one in GenerateLR0, where
|
The difference between this method and the one in GenerateLR0, where
|
||||||
this comes from, is in the part that stops recursion. In LALR we
|
this comes from, is that we're going to be keeping track of states
|
||||||
compare for set equality *ignoring lookahead*. If we find a match,
|
that we found that are equivalent in lookahead.
|
||||||
then instead of returning F unchanged, we merge the two equal sets
|
|
||||||
and replace the set in F, returning the modified set.
|
|
||||||
"""
|
"""
|
||||||
|
#
|
||||||
|
# First, do the actual walk. Don't merge yet: just keep track of all
|
||||||
|
# the config sets that need to be merged.
|
||||||
|
#
|
||||||
F = {}
|
F = {}
|
||||||
|
seen = set()
|
||||||
successors = []
|
successors = []
|
||||||
pending = [config_set]
|
pending = [config_set]
|
||||||
while len(pending) > 0:
|
while len(pending) > 0:
|
||||||
config_set = pending.pop()
|
config_set = pending.pop()
|
||||||
|
if config_set in seen:
|
||||||
|
continue
|
||||||
|
seen.add(config_set)
|
||||||
|
|
||||||
config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
|
config_set_no_la = tuple(s.clear_lookahead() for s in config_set)
|
||||||
|
|
||||||
existing = F.get(config_set_no_la)
|
existing = F.get(config_set_no_la)
|
||||||
if existing is not None:
|
if existing is not None:
|
||||||
F[config_set_no_la] = self.merge_sets(config_set, existing)
|
existing.append(config_set)
|
||||||
else:
|
else:
|
||||||
F[config_set_no_la] = config_set
|
F[config_set_no_la] = [config_set]
|
||||||
for symbol, successor in self.gen_all_successors(config_set):
|
|
||||||
successor_no_la = tuple(s.clear_lookahead() for s in successor)
|
for symbol, successor in self.gen_all_successors(config_set):
|
||||||
successors.append((config_set_no_la, symbol, successor_no_la))
|
successor_no_la = tuple(s.clear_lookahead() for s in successor)
|
||||||
pending.append(successor)
|
successors.append((config_set_no_la, symbol, successor_no_la))
|
||||||
|
pending.append(successor)
|
||||||
|
|
||||||
|
# Now we gathered the sets, merge them all.
|
||||||
|
final_sets = {}
|
||||||
|
for key, config_sets in F.items():
|
||||||
|
new_config_set = []
|
||||||
|
config_groupings = [[] for _ in range(len(config_sets[0]))]
|
||||||
|
for config_set in config_sets:
|
||||||
|
for i, config in enumerate(config_set):
|
||||||
|
config_groupings[i].append(config)
|
||||||
|
|
||||||
|
for config_group in config_groupings:
|
||||||
|
new_lookahead = [l for config in config_group for l in config.lookahead]
|
||||||
|
new_lookahead = tuple(sorted(set(new_lookahead)))
|
||||||
|
new_config_set.append(
|
||||||
|
Configuration(
|
||||||
|
name=config_group[0].name,
|
||||||
|
symbols=config_group[0].symbols,
|
||||||
|
position=config_group[0].position,
|
||||||
|
lookahead=new_lookahead,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
final_sets[key] = tuple(new_config_set)
|
||||||
|
|
||||||
# Register all the actually merged, final config sets.
|
# Register all the actually merged, final config sets.
|
||||||
result = ConfigurationSetInfo()
|
result = ConfigurationSetInfo()
|
||||||
for config_set in F.values():
|
for config_set in final_sets.values():
|
||||||
result.register_config_set(config_set)
|
result.register_config_set(config_set)
|
||||||
|
|
||||||
# Now record all the successors that we found. Of course, the actual
|
# Now record all the successors that we found. Of course, the actual
|
||||||
|
|
@ -1443,10 +1461,10 @@ class GenerateLALR(GenerateLR1):
|
||||||
# so we can find the final sets, then look them up in the registered
|
# so we can find the final sets, then look them up in the registered
|
||||||
# sets, and actually register the successor.
|
# sets, and actually register the successor.
|
||||||
for config_set_no_la, symbol, successor_no_la in successors:
|
for config_set_no_la, symbol, successor_no_la in successors:
|
||||||
actual_config_set = F[config_set_no_la]
|
actual_config_set = final_sets[config_set_no_la]
|
||||||
from_index = result.config_set_key[actual_config_set]
|
from_index = result.config_set_key[actual_config_set]
|
||||||
|
|
||||||
actual_successor = F[successor_no_la]
|
actual_successor = final_sets[successor_no_la]
|
||||||
to_index = result.config_set_key[actual_successor]
|
to_index = result.config_set_key[actual_successor]
|
||||||
|
|
||||||
result.add_successor(from_index, symbol, to_index)
|
result.add_successor(from_index, symbol, to_index)
|
||||||
|
|
@ -1499,7 +1517,7 @@ class Token(Rule):
|
||||||
def __init__(self, value):
|
def __init__(self, value):
|
||||||
self.value = sys.intern(value)
|
self.value = sys.intern(value)
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str], None, None]:
|
def flatten(self) -> typing.Generator[list["str | Token"], None, None]:
|
||||||
# We are just ourselves when flattened.
|
# We are just ourselves when flattened.
|
||||||
yield [self]
|
yield [self]
|
||||||
|
|
||||||
|
|
@ -1546,7 +1564,7 @@ class AlternativeRule(Rule):
|
||||||
self.left = left
|
self.left = left
|
||||||
self.right = right
|
self.right = right
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
||||||
# All the things from the left of the alternative, then all the things
|
# All the things from the left of the alternative, then all the things
|
||||||
# from the right, never intermingled.
|
# from the right, never intermingled.
|
||||||
yield from self.left.flatten()
|
yield from self.left.flatten()
|
||||||
|
|
@ -1562,7 +1580,7 @@ class SequenceRule(Rule):
|
||||||
self.first = first
|
self.first = first
|
||||||
self.second = second
|
self.second = second
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
||||||
# All the things in the prefix....
|
# All the things in the prefix....
|
||||||
for first in self.first.flatten():
|
for first in self.first.flatten():
|
||||||
# ...potentially followed by all the things in the suffix.
|
# ...potentially followed by all the things in the suffix.
|
||||||
|
|
@ -1575,7 +1593,7 @@ class NothingRule(Rule):
|
||||||
these, you're probably better off just using the singleton `Nothing`.
|
these, you're probably better off just using the singleton `Nothing`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
||||||
# It's quiet in here.
|
# It's quiet in here.
|
||||||
yield []
|
yield []
|
||||||
|
|
||||||
|
|
@ -1583,7 +1601,7 @@ class NothingRule(Rule):
|
||||||
Nothing = NothingRule()
|
Nothing = NothingRule()
|
||||||
|
|
||||||
|
|
||||||
def seq(*args: list[Rule]) -> Rule:
|
def seq(*args: Rule) -> Rule:
|
||||||
"""A rule that matches a sequence of rules.
|
"""A rule that matches a sequence of rules.
|
||||||
|
|
||||||
(A helper function that combines its arguments into nested sequences.)
|
(A helper function that combines its arguments into nested sequences.)
|
||||||
|
|
@ -1594,17 +1612,15 @@ def seq(*args: list[Rule]) -> Rule:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
# @typing.overload
|
||||||
def rule(name: None | str = None) -> typing.Callable[[typing.Callable], Rule]: ...
|
# def rule(f: None | str = None) -> typing.Callable[[typing.Callable], Rule]: ...
|
||||||
|
|
||||||
|
|
||||||
@typing.overload
|
# @typing.overload
|
||||||
def rule(fn: typing.Callable) -> Rule: ...
|
# def rule(f: typing.Callable) -> Rule: ...
|
||||||
|
|
||||||
|
|
||||||
def rule(
|
def rule(f: typing.Callable) -> Rule:
|
||||||
name_or_fn: None | str | typing.Callable = None,
|
|
||||||
) -> Rule | typing.Callable[[typing.Callable], Rule]:
|
|
||||||
"""The decorator that marks a method in a Grammar object as a nonterminal
|
"""The decorator that marks a method in a Grammar object as a nonterminal
|
||||||
rule.
|
rule.
|
||||||
|
|
||||||
|
|
@ -1612,16 +1628,11 @@ def rule(
|
||||||
If called with one argument, that argument is a name that overrides the name
|
If called with one argument, that argument is a name that overrides the name
|
||||||
of the nonterminal, which defaults to the name of the function.
|
of the nonterminal, which defaults to the name of the function.
|
||||||
"""
|
"""
|
||||||
|
name = f.__name__
|
||||||
|
return NonTerminal(f, name)
|
||||||
|
|
||||||
def _rule(callable):
|
|
||||||
return NonTerminal(callable, name)
|
|
||||||
|
|
||||||
if callable(name_or_fn):
|
PrecedenceList = list[typing.Tuple[Assoc, list[Rule]]]
|
||||||
name = name_or_fn.__name__
|
|
||||||
return _rule(name_or_fn)
|
|
||||||
else:
|
|
||||||
name = name_or_fn
|
|
||||||
return _rule
|
|
||||||
|
|
||||||
|
|
||||||
class Grammar:
|
class Grammar:
|
||||||
|
|
@ -1650,12 +1661,13 @@ class Grammar:
|
||||||
Not very exciting, perhaps, but it's something.
|
Not very exciting, perhaps, but it's something.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, precedence: list[typing.Tuple[Assoc, list[Token | NonTerminal]]] = None):
|
def __init__(self, precedence: PrecedenceList | None = None):
|
||||||
if precedence is None:
|
if precedence is None:
|
||||||
precedence = getattr(self, "precedence", [])
|
precedence = getattr(self, "precedence", [])
|
||||||
|
assert precedence is not None
|
||||||
|
|
||||||
precedence_table = {}
|
precedence_table = {}
|
||||||
for precedence, (associativity, symbols) in enumerate(precedence):
|
for prec, (associativity, symbols) in enumerate(precedence):
|
||||||
for symbol in symbols:
|
for symbol in symbols:
|
||||||
if isinstance(symbol, Token):
|
if isinstance(symbol, Token):
|
||||||
key = symbol.value
|
key = symbol.value
|
||||||
|
|
@ -1664,7 +1676,7 @@ class Grammar:
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"{symbol} must be either a Token or a NonTerminal")
|
raise ValueError(f"{symbol} must be either a Token or a NonTerminal")
|
||||||
|
|
||||||
precedence_table[key] = (associativity, precedence + 1)
|
precedence_table[key] = (associativity, prec + 1)
|
||||||
|
|
||||||
self._precedence = precedence_table
|
self._precedence = precedence_table
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue