Compare commits
3 commits
f1507a36f1
...
cabc091ee4
| Author | SHA1 | Date | |
|---|---|---|---|
| cabc091ee4 | |||
| e57a827e5c | |||
| d6131ad53e |
3 changed files with 48 additions and 11 deletions
16
grammar.py
16
grammar.py
|
|
@ -1,7 +1,8 @@
|
||||||
# This is an example grammar.
|
# This is an example grammar.
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from parser import Assoc, GenerateLALR, GenerateLR1, Grammar, Nothing, Terminal, rule, seq, Rule
|
import parser
|
||||||
|
from parser import Assoc, Grammar, Nothing, Terminal, rule, seq, Rule
|
||||||
|
|
||||||
ARROW = Terminal("Arrow")
|
ARROW = Terminal("Arrow")
|
||||||
AS = Terminal("As")
|
AS = Terminal("As")
|
||||||
|
|
@ -54,7 +55,7 @@ RSQUARE = Terminal("RightBracket")
|
||||||
|
|
||||||
|
|
||||||
class FineGrammar(Grammar):
|
class FineGrammar(Grammar):
|
||||||
generator = GenerateLALR
|
# generator = parser.GenerateLR1
|
||||||
start = "File"
|
start = "File"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -254,7 +255,7 @@ class FineGrammar(Grammar):
|
||||||
@rule
|
@rule
|
||||||
def primary_expression(self) -> Rule:
|
def primary_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
IDENTIFIER
|
self.identifier_expression
|
||||||
| SELF
|
| SELF
|
||||||
| NUMBER
|
| NUMBER
|
||||||
| STRING
|
| STRING
|
||||||
|
|
@ -273,6 +274,10 @@ class FineGrammar(Grammar):
|
||||||
| seq(LPAREN, self.expression, RPAREN)
|
| seq(LPAREN, self.expression, RPAREN)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@rule("IdentifierExpression")
|
||||||
|
def identifier_expression(self):
|
||||||
|
return IDENTIFIER
|
||||||
|
|
||||||
@rule("ConditionalExpression")
|
@rule("ConditionalExpression")
|
||||||
def conditional_expression(self) -> Rule:
|
def conditional_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
|
|
@ -318,7 +323,6 @@ class FineGrammar(Grammar):
|
||||||
return (
|
return (
|
||||||
seq(self.variable_binding, self._pattern_core, self.pattern_predicate)
|
seq(self.variable_binding, self._pattern_core, self.pattern_predicate)
|
||||||
| seq(self.variable_binding, self._pattern_core)
|
| seq(self.variable_binding, self._pattern_core)
|
||||||
| seq(self._pattern_core, self.pattern_predicate)
|
|
||||||
| self._pattern_core
|
| self._pattern_core
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -534,3 +538,7 @@ class FineTokens:
|
||||||
column_index = start - col_start
|
column_index = start - col_start
|
||||||
value = self.src[start : start + length]
|
value = self.src[start : start + length]
|
||||||
print(f"{start:04} {kind.value:12} {value} ({line_index}, {column_index})")
|
print(f"{start:04} {kind.value:12} {value} ({line_index}, {column_index})")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
FineGrammar().build_table()
|
||||||
|
|
|
||||||
|
|
@ -390,7 +390,7 @@ class Harness:
|
||||||
for line in lines[: rows - 3]:
|
for line in lines[: rows - 3]:
|
||||||
print(line[:cols] + "\r")
|
print(line[:cols] + "\r")
|
||||||
else:
|
else:
|
||||||
wrapper = textwrap.TextWrapper(width=cols)
|
wrapper = textwrap.TextWrapper(width=cols, drop_whitespace=False)
|
||||||
lines = [line for error in self.errors for line in wrapper.wrap(error)]
|
lines = [line for error in self.errors for line in wrapper.wrap(error)]
|
||||||
for line in lines[: rows - 3]:
|
for line in lines[: rows - 3]:
|
||||||
print(line + "\r")
|
print(line + "\r")
|
||||||
|
|
|
||||||
41
parser.py
41
parser.py
|
|
@ -135,6 +135,7 @@ import dataclasses
|
||||||
import enum
|
import enum
|
||||||
import functools
|
import functools
|
||||||
import inspect
|
import inspect
|
||||||
|
import json
|
||||||
import sys
|
import sys
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
|
|
@ -282,7 +283,11 @@ class Configuration:
|
||||||
)
|
)
|
||||||
|
|
||||||
def format(self, alphabet: list[str]) -> str:
|
def format(self, alphabet: list[str]) -> str:
|
||||||
la = ", " + str(tuple(alphabet[i] for i in self.lookahead)) if self.lookahead != () else ""
|
if self.lookahead != ():
|
||||||
|
la = " {" + ",".join(alphabet[i] for i in self.lookahead) + "}"
|
||||||
|
else:
|
||||||
|
la = ""
|
||||||
|
|
||||||
return "{name} -> {bits}{lookahead}".format(
|
return "{name} -> {bits}{lookahead}".format(
|
||||||
name=alphabet[self.name],
|
name=alphabet[self.name],
|
||||||
bits=" ".join(
|
bits=" ".join(
|
||||||
|
|
@ -297,7 +302,7 @@ class Configuration:
|
||||||
|
|
||||||
|
|
||||||
# ConfigSet = typing.Tuple[Configuration, ...]
|
# ConfigSet = typing.Tuple[Configuration, ...]
|
||||||
class ConfigSet(frozenset):
|
class ConfigSet(frozenset[Configuration]):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -354,6 +359,21 @@ class ConfigurationSetInfo:
|
||||||
"""
|
"""
|
||||||
self.successors[c_id][symbol] = successor
|
self.successors[c_id][symbol] = successor
|
||||||
|
|
||||||
|
def dump_state(self, alphabet: list[str]) -> str:
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
str(set_index): {
|
||||||
|
"configs": [c.format(alphabet) for c in config_set],
|
||||||
|
"successors": {
|
||||||
|
alphabet[k]: str(v) for k, v in self.successors[set_index].items()
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for set_index, config_set in enumerate(self.sets)
|
||||||
|
},
|
||||||
|
indent=4,
|
||||||
|
sort_keys=True,
|
||||||
|
)
|
||||||
|
|
||||||
def find_path_to_set(self, target_set: ConfigSet) -> list[int]:
|
def find_path_to_set(self, target_set: ConfigSet) -> list[int]:
|
||||||
"""Trace the path of grammar symbols from the first set (which always
|
"""Trace the path of grammar symbols from the first set (which always
|
||||||
set 0) to the target set. This is useful in conflict reporting,
|
set 0) to the target set. This is useful in conflict reporting,
|
||||||
|
|
@ -369,6 +389,8 @@ class ConfigurationSetInfo:
|
||||||
visited = set()
|
visited = set()
|
||||||
|
|
||||||
queue: collections.deque = collections.deque()
|
queue: collections.deque = collections.deque()
|
||||||
|
# NOTE: Set 0 is always the first set, the one that contains the
|
||||||
|
# start symbol.
|
||||||
queue.appendleft((0, []))
|
queue.appendleft((0, []))
|
||||||
while len(queue) > 0:
|
while len(queue) > 0:
|
||||||
set_index, path = queue.pop()
|
set_index, path = queue.pop()
|
||||||
|
|
@ -452,7 +474,7 @@ class AmbiguityError(Exception):
|
||||||
self.ambiguities = ambiguities
|
self.ambiguities = ambiguities
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "The grammar is ambiguous:\n\n" + "\n\n".join(
|
return f"{len(self.ambiguities)} ambiguities:\n\n" + "\n\n".join(
|
||||||
str(ambiguity) for ambiguity in self.ambiguities
|
str(ambiguity) for ambiguity in self.ambiguities
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -505,7 +527,7 @@ class ErrorCollection:
|
||||||
alphabet: list[str],
|
alphabet: list[str],
|
||||||
all_sets: ConfigurationSetInfo,
|
all_sets: ConfigurationSetInfo,
|
||||||
) -> AmbiguityError | None:
|
) -> AmbiguityError | None:
|
||||||
"""Format all the errors into a string, or return None if there are no
|
"""Format all the errors into an error, or return None if there are no
|
||||||
errors.
|
errors.
|
||||||
|
|
||||||
We need the alphabet to turn all these integers into something human
|
We need the alphabet to turn all these integers into something human
|
||||||
|
|
@ -515,6 +537,9 @@ class ErrorCollection:
|
||||||
if len(self.errors) == 0:
|
if len(self.errors) == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
# with open("ambiguity.json", mode="w", encoding="utf-8") as aj:
|
||||||
|
# aj.write(all_sets.dump_state(alphabet))
|
||||||
|
|
||||||
errors = []
|
errors = []
|
||||||
for config_set, set_errors in self.errors.items():
|
for config_set, set_errors in self.errors.items():
|
||||||
path = all_sets.find_path_to_set(config_set)
|
path = all_sets.find_path_to_set(config_set)
|
||||||
|
|
@ -1518,7 +1543,7 @@ class GenerateLR1(GenerateSLR1):
|
||||||
lookahead_tuple = tuple(sorted(lookahead))
|
lookahead_tuple = tuple(sorted(lookahead))
|
||||||
next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple))
|
next.append(Configuration.from_rule(config_next, rule, lookahead=lookahead_tuple))
|
||||||
|
|
||||||
return tuple(sorted(next))
|
return tuple(next)
|
||||||
|
|
||||||
def gen_all_sets(self):
|
def gen_all_sets(self):
|
||||||
"""Generate all of the configuration sets for the grammar.
|
"""Generate all of the configuration sets for the grammar.
|
||||||
|
|
@ -1951,6 +1976,10 @@ class Grammar:
|
||||||
new_clause = []
|
new_clause = []
|
||||||
for symbol in clause:
|
for symbol in clause:
|
||||||
if isinstance(symbol, Terminal):
|
if isinstance(symbol, Terminal):
|
||||||
|
if symbol.value in temp_grammar:
|
||||||
|
raise ValueError(
|
||||||
|
f"'{symbol.value}' is the name of both a Terminal and a NonTerminal rule. This will cause problems."
|
||||||
|
)
|
||||||
new_clause.append(symbol.value)
|
new_clause.append(symbol.value)
|
||||||
else:
|
else:
|
||||||
new_clause.append(symbol)
|
new_clause.append(symbol)
|
||||||
|
|
@ -1959,7 +1988,7 @@ class Grammar:
|
||||||
|
|
||||||
return grammar, transparents
|
return grammar, transparents
|
||||||
|
|
||||||
def build_table(self, start: str | None, generator=None):
|
def build_table(self, start: str | None = None, generator=None):
|
||||||
"""Construct a parse table for this grammar, starting at the named
|
"""Construct a parse table for this grammar, starting at the named
|
||||||
nonterminal rule.
|
nonterminal rule.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue