faster: Snapshot a big grammar I'm playing with
This commit is contained in:
parent
d0be3ea267
commit
7147557e2b
1 changed files with 418 additions and 0 deletions
418
grammar.py
Normal file
418
grammar.py
Normal file
|
|
@ -0,0 +1,418 @@
|
|||
import parser_faster
|
||||
import sys
|
||||
import typing
|
||||
|
||||
from parser_faster import Assoc
|
||||
|
||||
class Token:
|
||||
value: str
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = sys.intern(value)
|
||||
|
||||
Symbol = Token | str
|
||||
|
||||
def desugar(
|
||||
grammar: dict[str, list[list[Symbol]]],
|
||||
precedence: list[typing.Tuple[Assoc, list[Symbol]]],
|
||||
):
|
||||
nonterminal_refs = set()
|
||||
nonterminals = set()
|
||||
terminals = set()
|
||||
|
||||
result: list[typing.Tuple[str, list[str]]] = []
|
||||
for (k, v) in grammar.items():
|
||||
nonterminals.add(k)
|
||||
|
||||
for rule in v:
|
||||
assert isinstance(rule, list)
|
||||
result_rule: list[str] = []
|
||||
for symbol in rule:
|
||||
if isinstance(symbol, Token):
|
||||
result_rule.append(symbol.value)
|
||||
terminals.add(symbol.value)
|
||||
else:
|
||||
result_rule.append(symbol)
|
||||
nonterminal_refs.add(symbol)
|
||||
|
||||
result.append((k, result_rule))
|
||||
|
||||
unknown_rules = nonterminal_refs - nonterminals
|
||||
if len(unknown_rules) > 0:
|
||||
undefined = "\n ".join(unknown_rules)
|
||||
raise Exception(f"The following rules are not defined:\n {undefined}")
|
||||
|
||||
overlap_rules = nonterminals & terminals
|
||||
if len(overlap_rules) > 0:
|
||||
overlap = "\n ".join(overlap_rules)
|
||||
raise Exception(f"The following symbols are both tokens and rules:\n {overlap}")
|
||||
|
||||
result_precedence = {
|
||||
(symbol.value if isinstance(symbol, Token) else symbol):(associativity, precedence + 1)
|
||||
for precedence, (associativity, symbols) in enumerate(precedence)
|
||||
for symbol in symbols
|
||||
}
|
||||
|
||||
return result, result_precedence
|
||||
|
||||
def dump_yacc(grammar):
|
||||
tokens = set()
|
||||
for rules in grammar.values():
|
||||
for rule in rules:
|
||||
for symbol in rule:
|
||||
if symbol.startswith("token:"):
|
||||
symbol = symbol[6:].upper()
|
||||
tokens.add(symbol)
|
||||
for token in sorted(tokens):
|
||||
print(f"%token {token}")
|
||||
|
||||
print()
|
||||
print("%%")
|
||||
|
||||
for name, rules in grammar.items():
|
||||
print(f"{name} : ", end='');
|
||||
for i,rule in enumerate(rules):
|
||||
if i != 0:
|
||||
print(f"{' ' * len(name)} | ", end='')
|
||||
|
||||
parts = []
|
||||
for symbol in rule:
|
||||
if symbol.startswith("token:"):
|
||||
symbol = symbol[6:].upper()
|
||||
parts.append(symbol)
|
||||
print(' '.join(parts))
|
||||
print()
|
||||
|
||||
print("%%")
|
||||
|
||||
|
||||
ARROW = Token("Arrow")
|
||||
AS = Token("As")
|
||||
BAR = Token("Bar")
|
||||
CLASS = Token("Class")
|
||||
COLON = Token("Colon")
|
||||
ELSE = Token("Else")
|
||||
FOR = Token("For")
|
||||
FUN = Token("Fun")
|
||||
IDENTIFIER = Token("Identifier")
|
||||
IF = Token("If")
|
||||
IMPORT = Token("Import")
|
||||
IN = Token("In")
|
||||
LCURLY = Token("LeftBrace")
|
||||
LET = Token("Let")
|
||||
RCURLY = Token("RightBrace")
|
||||
RETURN = Token("Return")
|
||||
SEMICOLON = Token("Semicolon")
|
||||
STRING = Token("String")
|
||||
WHILE = Token("While")
|
||||
EQUAL = Token("Equal")
|
||||
LPAREN = Token("LeftParen")
|
||||
RPAREN = Token("RightParen")
|
||||
COMMA = Token("Comma")
|
||||
SELF = Token("Selff")
|
||||
OR = Token("Or")
|
||||
IS = Token("Is")
|
||||
AND = Token("And")
|
||||
EQUALEQUAL = Token("EqualEqual")
|
||||
BANGEQUAL = Token("BangEqual")
|
||||
LESS = Token("Less")
|
||||
GREATER = Token("Greater")
|
||||
LESSEQUAL = Token("LessEqual")
|
||||
GREATEREQUAL = Token("GreaterEqual")
|
||||
PLUS = Token("Plus")
|
||||
MINUS = Token("Minus")
|
||||
STAR = Token("Star")
|
||||
SLASH = Token("Slash")
|
||||
NUMBER = Token("Number")
|
||||
TRUE = Token("True")
|
||||
FALSE = Token("False")
|
||||
BANG = Token("Bang")
|
||||
DOT = Token("Dot")
|
||||
MATCH = Token("Match")
|
||||
EXPORT = Token("Export")
|
||||
UNDERSCORE = Token("Underscore")
|
||||
NEW = Token("New")
|
||||
|
||||
# fmt: off
|
||||
precedence = [
|
||||
(Assoc.RIGHT, [EQUAL]),
|
||||
(Assoc.LEFT, [OR]),
|
||||
(Assoc.LEFT, [IS]),
|
||||
(Assoc.LEFT, [AND]),
|
||||
(Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
|
||||
(Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
|
||||
(Assoc.LEFT, [PLUS, MINUS]),
|
||||
(Assoc.LEFT, [STAR, SLASH]),
|
||||
(Assoc.LEFT, [LPAREN]),
|
||||
(Assoc.LEFT, [DOT]),
|
||||
|
||||
# If there's a confusion about whether to make an IF statement or an
|
||||
# expression, prefer the statement.
|
||||
(Assoc.NONE, ["IfStatement"]),
|
||||
]
|
||||
|
||||
grammar = {
|
||||
"File": [
|
||||
["FileStatementList"],
|
||||
],
|
||||
"FileStatementList": [
|
||||
["FileStatement"],
|
||||
["FileStatement", "FileStatementList"],
|
||||
],
|
||||
"FileStatement": [
|
||||
["ImportStatement"],
|
||||
["ClassDeclaration"],
|
||||
["ExportStatement"],
|
||||
["Statement"],
|
||||
],
|
||||
|
||||
"ImportStatement": [
|
||||
[IMPORT, STRING, AS, IDENTIFIER, SEMICOLON],
|
||||
],
|
||||
|
||||
# Classes
|
||||
"ClassDeclaration": [
|
||||
[CLASS, IDENTIFIER, "ClassBody"],
|
||||
],
|
||||
"ClassBody": [
|
||||
[LCURLY, RCURLY],
|
||||
[LCURLY, "ClassMembers", RCURLY],
|
||||
],
|
||||
"ClassMembers": [
|
||||
["ClassMember"],
|
||||
["ClassMembers", "ClassMember"],
|
||||
],
|
||||
"ClassMember": [
|
||||
["FieldDeclaration"],
|
||||
["FunctionDeclaration"],
|
||||
],
|
||||
"FieldDeclaration": [
|
||||
[IDENTIFIER, COLON, "TypeExpression", SEMICOLON],
|
||||
],
|
||||
|
||||
# Types
|
||||
"TypeExpression": [
|
||||
["AlternateType"],
|
||||
["TypeIdentifier"],
|
||||
],
|
||||
"AlternateType": [
|
||||
["TypeExpression", BAR, "TypeIdentifier"],
|
||||
],
|
||||
"TypeIdentifier": [
|
||||
[IDENTIFIER],
|
||||
],
|
||||
|
||||
"ExportStatement": [
|
||||
[EXPORT, "ClassDeclaration"],
|
||||
[EXPORT, "FunctionDeclaration"],
|
||||
# [EXPORT, "LetStatement"],
|
||||
[EXPORT, "ExportList", SEMICOLON],
|
||||
],
|
||||
"ExportList": [
|
||||
[],
|
||||
[IDENTIFIER],
|
||||
[IDENTIFIER, COMMA, "ExportList"],
|
||||
],
|
||||
|
||||
# Functions
|
||||
"FunctionDeclaration": [
|
||||
[FUN, IDENTIFIER, "FunctionParameters", "Block"],
|
||||
[FUN, IDENTIFIER, "FunctionParameters", ARROW, "TypeExpression", "Block"],
|
||||
],
|
||||
"FunctionParameters": [
|
||||
[LPAREN, RPAREN],
|
||||
[LPAREN, "FirstParameter", RPAREN],
|
||||
[LPAREN, "FirstParameter", COMMA, "ParameterList", RPAREN],
|
||||
],
|
||||
"FirstParameter": [
|
||||
[SELF],
|
||||
["Parameter"],
|
||||
],
|
||||
"ParameterList": [
|
||||
[],
|
||||
["Parameter"],
|
||||
["Parameter", COMMA, "ParameterList"],
|
||||
],
|
||||
"Parameter": [
|
||||
[IDENTIFIER, COLON, "TypeExpression"],
|
||||
],
|
||||
|
||||
# Block
|
||||
"Block": [
|
||||
[LCURLY, RCURLY],
|
||||
[LCURLY, "StatementList", RCURLY],
|
||||
[LCURLY, "StatementList", "Expression", RCURLY],
|
||||
],
|
||||
"StatementList": [
|
||||
["Statement"],
|
||||
["StatementList", "Statement"],
|
||||
],
|
||||
|
||||
"Statement": [
|
||||
["FunctionDeclaration"],
|
||||
["LetStatement"],
|
||||
# ["ReturnStatement"],
|
||||
# ["ForStatement"],
|
||||
["IfStatement"],
|
||||
# ["WhileStatement"],
|
||||
# ["ExpressionStatement"],
|
||||
],
|
||||
|
||||
"LetStatement": [
|
||||
[LET, IDENTIFIER, EQUAL, "Expression", SEMICOLON],
|
||||
],
|
||||
|
||||
# "ReturnStatement": [
|
||||
# [RETURN, "Expression", SEMICOLON],
|
||||
# ],
|
||||
|
||||
# "ForStatement": [
|
||||
# [FOR, "IteratorVariable", IN, "Expression", "Block"],
|
||||
# ],
|
||||
# "IteratorVariable": [[IDENTIFIER]],
|
||||
|
||||
"IfStatement": [["ConditionalExpression"]],
|
||||
|
||||
# "WhileStatement": [
|
||||
# [WHILE, "Expression", "Block"],
|
||||
# ],
|
||||
|
||||
# "ExpressionStatement": [
|
||||
# ["Expression", SEMICOLON],
|
||||
# ],
|
||||
|
||||
# Expressions
|
||||
"Expression": [["AssignmentExpression"]],
|
||||
|
||||
"AssignmentExpression": [
|
||||
["OrExpression", EQUAL, "AssignmentExpression"],
|
||||
["OrExpression"],
|
||||
],
|
||||
"OrExpression": [
|
||||
["OrExpression", OR, "IsExpression"],
|
||||
["IsExpression"],
|
||||
],
|
||||
"IsExpression": [
|
||||
# ["IsExpression", IS, "Pattern"],
|
||||
["AndExpression"],
|
||||
],
|
||||
"AndExpression": [
|
||||
["AndExpression", AND, "EqualityExpression"],
|
||||
["EqualityExpression"],
|
||||
],
|
||||
"EqualityExpression": [
|
||||
["EqualityExpression", EQUALEQUAL, "RelationExpression"],
|
||||
["EqualityExpression", BANGEQUAL, "RelationExpression"],
|
||||
["RelationExpression"],
|
||||
],
|
||||
"RelationExpression": [
|
||||
["RelationExpression", LESS, "AdditiveExpression"],
|
||||
["RelationExpression", LESSEQUAL, "AdditiveExpression"],
|
||||
["RelationExpression", GREATER, "AdditiveExpression"],
|
||||
["RelationExpression", GREATEREQUAL, "AdditiveExpression"],
|
||||
["AdditiveExpression"],
|
||||
],
|
||||
"AdditiveExpression": [
|
||||
["AdditiveExpression", PLUS, "MultiplicationExpression"],
|
||||
["AdditiveExpression", MINUS, "MultiplicationExpression"],
|
||||
["MultiplicationExpression"],
|
||||
],
|
||||
"MultiplicationExpression": [
|
||||
["MultiplicationExpression", STAR, "PrimaryExpression"],
|
||||
["MultiplicationExpression", SLASH, "PrimaryExpression"],
|
||||
["PrimaryExpression"],
|
||||
],
|
||||
"PrimaryExpression": [
|
||||
[IDENTIFIER],
|
||||
[SELF],
|
||||
[NUMBER],
|
||||
[STRING],
|
||||
[TRUE],
|
||||
[FALSE],
|
||||
[BANG, "PrimaryExpression"],
|
||||
[MINUS, "PrimaryExpression"],
|
||||
|
||||
["Block"],
|
||||
["ConditionalExpression"],
|
||||
# ["ListConstructorExpression"],
|
||||
# ["ObjectConstructorExpression"],
|
||||
# ["MatchExpression"],
|
||||
|
||||
# ["PrimaryExpression", LPAREN, "ExpressionList", RPAREN],
|
||||
# ["PrimaryExpression", DOT, IDENTIFIER],
|
||||
|
||||
[LPAREN, "Expression", RPAREN],
|
||||
],
|
||||
|
||||
"ConditionalExpression": [
|
||||
[IF, "Expression", "Block"],
|
||||
[IF, "Expression", "Block", ELSE, "ConditionalExpression"],
|
||||
[IF, "Expression", "Block", ELSE, "Block"],
|
||||
],
|
||||
|
||||
# "ListConstructorExpression": [
|
||||
# [LCURLY, "ExpressionList", RCURLY],
|
||||
# ],
|
||||
|
||||
# "ExpressionList": [
|
||||
# [],
|
||||
# ["Expression"],
|
||||
# ["Expression", COMMA, "ExpressionList"],
|
||||
# ],
|
||||
|
||||
# # Match Expression
|
||||
# "MatchExpression": [
|
||||
# [MATCH, "MatchBody"],
|
||||
# ],
|
||||
# "MatchBody": [
|
||||
# [LCURLY, "MatchArms", RCURLY],
|
||||
# ],
|
||||
# "MatchArms": [
|
||||
# [],
|
||||
# ["MatchArm"],
|
||||
# ["MatchArm", COMMA, "MatchArms"],
|
||||
# ],
|
||||
# "MatchArm": [
|
||||
# ["Pattern", ARROW, "Expression"],
|
||||
# ],
|
||||
|
||||
# # Pattern
|
||||
# "Pattern": [
|
||||
# ["VariableBinding", "PatternCore", AND, "AndExpression"],
|
||||
# ["VariableBinding", "PatternCore"],
|
||||
# ["PatternCore", AND, "AndExpression"],
|
||||
# ["PatternCore"],
|
||||
# ],
|
||||
# "PatternCore": [
|
||||
# ["TypeExpression"],
|
||||
# ["WildcardPattern"],
|
||||
# ],
|
||||
# "WildcardPattern": [[UNDERSCORE]],
|
||||
# "VariableBinding": [[IDENTIFIER, COLON]],
|
||||
|
||||
# # Object Constructor
|
||||
# "ObjectConstructorExpression": [
|
||||
# [NEW, "TypeIdentifier", "FieldList"],
|
||||
# ],
|
||||
# "FieldList": [
|
||||
# [LCURLY, "FieldValues", RCURLY],
|
||||
# ],
|
||||
# "FieldValues": [
|
||||
# [],
|
||||
# ["FieldValue"],
|
||||
# ["FieldValue", COMMA, "FieldValues"],
|
||||
# ],
|
||||
# "FieldValue": [
|
||||
# [IDENTIFIER],
|
||||
# [IDENTIFIER, COLON, "Expression"],
|
||||
# ],
|
||||
}
|
||||
# fmt: on
|
||||
|
||||
# dump_yacc(grammar)
|
||||
grammar, precedence = desugar(grammar, precedence)
|
||||
gen = parser_faster.GenerateLR1("File", grammar, precedence=precedence)
|
||||
table = gen.gen_table()
|
||||
print(parser_faster.format_table(gen, table))
|
||||
print()
|
||||
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
|
||||
Loading…
Add table
Add a link
Reference in a new issue