faster: Snapshot a big grammar I'm playing with
This commit is contained in:
parent
d0be3ea267
commit
7147557e2b
1 changed files with 418 additions and 0 deletions
418
grammar.py
Normal file
418
grammar.py
Normal file
|
|
@ -0,0 +1,418 @@
|
||||||
|
import parser_faster
|
||||||
|
import sys
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from parser_faster import Assoc
|
||||||
|
|
||||||
|
class Token:
|
||||||
|
value: str
|
||||||
|
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = sys.intern(value)
|
||||||
|
|
||||||
|
Symbol = Token | str
|
||||||
|
|
||||||
|
def desugar(
|
||||||
|
grammar: dict[str, list[list[Symbol]]],
|
||||||
|
precedence: list[typing.Tuple[Assoc, list[Symbol]]],
|
||||||
|
):
|
||||||
|
nonterminal_refs = set()
|
||||||
|
nonterminals = set()
|
||||||
|
terminals = set()
|
||||||
|
|
||||||
|
result: list[typing.Tuple[str, list[str]]] = []
|
||||||
|
for (k, v) in grammar.items():
|
||||||
|
nonterminals.add(k)
|
||||||
|
|
||||||
|
for rule in v:
|
||||||
|
assert isinstance(rule, list)
|
||||||
|
result_rule: list[str] = []
|
||||||
|
for symbol in rule:
|
||||||
|
if isinstance(symbol, Token):
|
||||||
|
result_rule.append(symbol.value)
|
||||||
|
terminals.add(symbol.value)
|
||||||
|
else:
|
||||||
|
result_rule.append(symbol)
|
||||||
|
nonterminal_refs.add(symbol)
|
||||||
|
|
||||||
|
result.append((k, result_rule))
|
||||||
|
|
||||||
|
unknown_rules = nonterminal_refs - nonterminals
|
||||||
|
if len(unknown_rules) > 0:
|
||||||
|
undefined = "\n ".join(unknown_rules)
|
||||||
|
raise Exception(f"The following rules are not defined:\n {undefined}")
|
||||||
|
|
||||||
|
overlap_rules = nonterminals & terminals
|
||||||
|
if len(overlap_rules) > 0:
|
||||||
|
overlap = "\n ".join(overlap_rules)
|
||||||
|
raise Exception(f"The following symbols are both tokens and rules:\n {overlap}")
|
||||||
|
|
||||||
|
result_precedence = {
|
||||||
|
(symbol.value if isinstance(symbol, Token) else symbol):(associativity, precedence + 1)
|
||||||
|
for precedence, (associativity, symbols) in enumerate(precedence)
|
||||||
|
for symbol in symbols
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, result_precedence
|
||||||
|
|
||||||
|
def dump_yacc(grammar):
|
||||||
|
tokens = set()
|
||||||
|
for rules in grammar.values():
|
||||||
|
for rule in rules:
|
||||||
|
for symbol in rule:
|
||||||
|
if symbol.startswith("token:"):
|
||||||
|
symbol = symbol[6:].upper()
|
||||||
|
tokens.add(symbol)
|
||||||
|
for token in sorted(tokens):
|
||||||
|
print(f"%token {token}")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("%%")
|
||||||
|
|
||||||
|
for name, rules in grammar.items():
|
||||||
|
print(f"{name} : ", end='');
|
||||||
|
for i,rule in enumerate(rules):
|
||||||
|
if i != 0:
|
||||||
|
print(f"{' ' * len(name)} | ", end='')
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
for symbol in rule:
|
||||||
|
if symbol.startswith("token:"):
|
||||||
|
symbol = symbol[6:].upper()
|
||||||
|
parts.append(symbol)
|
||||||
|
print(' '.join(parts))
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("%%")
|
||||||
|
|
||||||
|
|
||||||
|
ARROW = Token("Arrow")
|
||||||
|
AS = Token("As")
|
||||||
|
BAR = Token("Bar")
|
||||||
|
CLASS = Token("Class")
|
||||||
|
COLON = Token("Colon")
|
||||||
|
ELSE = Token("Else")
|
||||||
|
FOR = Token("For")
|
||||||
|
FUN = Token("Fun")
|
||||||
|
IDENTIFIER = Token("Identifier")
|
||||||
|
IF = Token("If")
|
||||||
|
IMPORT = Token("Import")
|
||||||
|
IN = Token("In")
|
||||||
|
LCURLY = Token("LeftBrace")
|
||||||
|
LET = Token("Let")
|
||||||
|
RCURLY = Token("RightBrace")
|
||||||
|
RETURN = Token("Return")
|
||||||
|
SEMICOLON = Token("Semicolon")
|
||||||
|
STRING = Token("String")
|
||||||
|
WHILE = Token("While")
|
||||||
|
EQUAL = Token("Equal")
|
||||||
|
LPAREN = Token("LeftParen")
|
||||||
|
RPAREN = Token("RightParen")
|
||||||
|
COMMA = Token("Comma")
|
||||||
|
SELF = Token("Selff")
|
||||||
|
OR = Token("Or")
|
||||||
|
IS = Token("Is")
|
||||||
|
AND = Token("And")
|
||||||
|
EQUALEQUAL = Token("EqualEqual")
|
||||||
|
BANGEQUAL = Token("BangEqual")
|
||||||
|
LESS = Token("Less")
|
||||||
|
GREATER = Token("Greater")
|
||||||
|
LESSEQUAL = Token("LessEqual")
|
||||||
|
GREATEREQUAL = Token("GreaterEqual")
|
||||||
|
PLUS = Token("Plus")
|
||||||
|
MINUS = Token("Minus")
|
||||||
|
STAR = Token("Star")
|
||||||
|
SLASH = Token("Slash")
|
||||||
|
NUMBER = Token("Number")
|
||||||
|
TRUE = Token("True")
|
||||||
|
FALSE = Token("False")
|
||||||
|
BANG = Token("Bang")
|
||||||
|
DOT = Token("Dot")
|
||||||
|
MATCH = Token("Match")
|
||||||
|
EXPORT = Token("Export")
|
||||||
|
UNDERSCORE = Token("Underscore")
|
||||||
|
NEW = Token("New")
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
precedence = [
|
||||||
|
(Assoc.RIGHT, [EQUAL]),
|
||||||
|
(Assoc.LEFT, [OR]),
|
||||||
|
(Assoc.LEFT, [IS]),
|
||||||
|
(Assoc.LEFT, [AND]),
|
||||||
|
(Assoc.LEFT, [EQUALEQUAL, BANGEQUAL]),
|
||||||
|
(Assoc.LEFT, [LESS, GREATER, GREATEREQUAL, LESSEQUAL]),
|
||||||
|
(Assoc.LEFT, [PLUS, MINUS]),
|
||||||
|
(Assoc.LEFT, [STAR, SLASH]),
|
||||||
|
(Assoc.LEFT, [LPAREN]),
|
||||||
|
(Assoc.LEFT, [DOT]),
|
||||||
|
|
||||||
|
# If there's a confusion about whether to make an IF statement or an
|
||||||
|
# expression, prefer the statement.
|
||||||
|
(Assoc.NONE, ["IfStatement"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
grammar = {
|
||||||
|
"File": [
|
||||||
|
["FileStatementList"],
|
||||||
|
],
|
||||||
|
"FileStatementList": [
|
||||||
|
["FileStatement"],
|
||||||
|
["FileStatement", "FileStatementList"],
|
||||||
|
],
|
||||||
|
"FileStatement": [
|
||||||
|
["ImportStatement"],
|
||||||
|
["ClassDeclaration"],
|
||||||
|
["ExportStatement"],
|
||||||
|
["Statement"],
|
||||||
|
],
|
||||||
|
|
||||||
|
"ImportStatement": [
|
||||||
|
[IMPORT, STRING, AS, IDENTIFIER, SEMICOLON],
|
||||||
|
],
|
||||||
|
|
||||||
|
# Classes
|
||||||
|
"ClassDeclaration": [
|
||||||
|
[CLASS, IDENTIFIER, "ClassBody"],
|
||||||
|
],
|
||||||
|
"ClassBody": [
|
||||||
|
[LCURLY, RCURLY],
|
||||||
|
[LCURLY, "ClassMembers", RCURLY],
|
||||||
|
],
|
||||||
|
"ClassMembers": [
|
||||||
|
["ClassMember"],
|
||||||
|
["ClassMembers", "ClassMember"],
|
||||||
|
],
|
||||||
|
"ClassMember": [
|
||||||
|
["FieldDeclaration"],
|
||||||
|
["FunctionDeclaration"],
|
||||||
|
],
|
||||||
|
"FieldDeclaration": [
|
||||||
|
[IDENTIFIER, COLON, "TypeExpression", SEMICOLON],
|
||||||
|
],
|
||||||
|
|
||||||
|
# Types
|
||||||
|
"TypeExpression": [
|
||||||
|
["AlternateType"],
|
||||||
|
["TypeIdentifier"],
|
||||||
|
],
|
||||||
|
"AlternateType": [
|
||||||
|
["TypeExpression", BAR, "TypeIdentifier"],
|
||||||
|
],
|
||||||
|
"TypeIdentifier": [
|
||||||
|
[IDENTIFIER],
|
||||||
|
],
|
||||||
|
|
||||||
|
"ExportStatement": [
|
||||||
|
[EXPORT, "ClassDeclaration"],
|
||||||
|
[EXPORT, "FunctionDeclaration"],
|
||||||
|
# [EXPORT, "LetStatement"],
|
||||||
|
[EXPORT, "ExportList", SEMICOLON],
|
||||||
|
],
|
||||||
|
"ExportList": [
|
||||||
|
[],
|
||||||
|
[IDENTIFIER],
|
||||||
|
[IDENTIFIER, COMMA, "ExportList"],
|
||||||
|
],
|
||||||
|
|
||||||
|
# Functions
|
||||||
|
"FunctionDeclaration": [
|
||||||
|
[FUN, IDENTIFIER, "FunctionParameters", "Block"],
|
||||||
|
[FUN, IDENTIFIER, "FunctionParameters", ARROW, "TypeExpression", "Block"],
|
||||||
|
],
|
||||||
|
"FunctionParameters": [
|
||||||
|
[LPAREN, RPAREN],
|
||||||
|
[LPAREN, "FirstParameter", RPAREN],
|
||||||
|
[LPAREN, "FirstParameter", COMMA, "ParameterList", RPAREN],
|
||||||
|
],
|
||||||
|
"FirstParameter": [
|
||||||
|
[SELF],
|
||||||
|
["Parameter"],
|
||||||
|
],
|
||||||
|
"ParameterList": [
|
||||||
|
[],
|
||||||
|
["Parameter"],
|
||||||
|
["Parameter", COMMA, "ParameterList"],
|
||||||
|
],
|
||||||
|
"Parameter": [
|
||||||
|
[IDENTIFIER, COLON, "TypeExpression"],
|
||||||
|
],
|
||||||
|
|
||||||
|
# Block
|
||||||
|
"Block": [
|
||||||
|
[LCURLY, RCURLY],
|
||||||
|
[LCURLY, "StatementList", RCURLY],
|
||||||
|
[LCURLY, "StatementList", "Expression", RCURLY],
|
||||||
|
],
|
||||||
|
"StatementList": [
|
||||||
|
["Statement"],
|
||||||
|
["StatementList", "Statement"],
|
||||||
|
],
|
||||||
|
|
||||||
|
"Statement": [
|
||||||
|
["FunctionDeclaration"],
|
||||||
|
["LetStatement"],
|
||||||
|
# ["ReturnStatement"],
|
||||||
|
# ["ForStatement"],
|
||||||
|
["IfStatement"],
|
||||||
|
# ["WhileStatement"],
|
||||||
|
# ["ExpressionStatement"],
|
||||||
|
],
|
||||||
|
|
||||||
|
"LetStatement": [
|
||||||
|
[LET, IDENTIFIER, EQUAL, "Expression", SEMICOLON],
|
||||||
|
],
|
||||||
|
|
||||||
|
# "ReturnStatement": [
|
||||||
|
# [RETURN, "Expression", SEMICOLON],
|
||||||
|
# ],
|
||||||
|
|
||||||
|
# "ForStatement": [
|
||||||
|
# [FOR, "IteratorVariable", IN, "Expression", "Block"],
|
||||||
|
# ],
|
||||||
|
# "IteratorVariable": [[IDENTIFIER]],
|
||||||
|
|
||||||
|
"IfStatement": [["ConditionalExpression"]],
|
||||||
|
|
||||||
|
# "WhileStatement": [
|
||||||
|
# [WHILE, "Expression", "Block"],
|
||||||
|
# ],
|
||||||
|
|
||||||
|
# "ExpressionStatement": [
|
||||||
|
# ["Expression", SEMICOLON],
|
||||||
|
# ],
|
||||||
|
|
||||||
|
# Expressions
|
||||||
|
"Expression": [["AssignmentExpression"]],
|
||||||
|
|
||||||
|
"AssignmentExpression": [
|
||||||
|
["OrExpression", EQUAL, "AssignmentExpression"],
|
||||||
|
["OrExpression"],
|
||||||
|
],
|
||||||
|
"OrExpression": [
|
||||||
|
["OrExpression", OR, "IsExpression"],
|
||||||
|
["IsExpression"],
|
||||||
|
],
|
||||||
|
"IsExpression": [
|
||||||
|
# ["IsExpression", IS, "Pattern"],
|
||||||
|
["AndExpression"],
|
||||||
|
],
|
||||||
|
"AndExpression": [
|
||||||
|
["AndExpression", AND, "EqualityExpression"],
|
||||||
|
["EqualityExpression"],
|
||||||
|
],
|
||||||
|
"EqualityExpression": [
|
||||||
|
["EqualityExpression", EQUALEQUAL, "RelationExpression"],
|
||||||
|
["EqualityExpression", BANGEQUAL, "RelationExpression"],
|
||||||
|
["RelationExpression"],
|
||||||
|
],
|
||||||
|
"RelationExpression": [
|
||||||
|
["RelationExpression", LESS, "AdditiveExpression"],
|
||||||
|
["RelationExpression", LESSEQUAL, "AdditiveExpression"],
|
||||||
|
["RelationExpression", GREATER, "AdditiveExpression"],
|
||||||
|
["RelationExpression", GREATEREQUAL, "AdditiveExpression"],
|
||||||
|
["AdditiveExpression"],
|
||||||
|
],
|
||||||
|
"AdditiveExpression": [
|
||||||
|
["AdditiveExpression", PLUS, "MultiplicationExpression"],
|
||||||
|
["AdditiveExpression", MINUS, "MultiplicationExpression"],
|
||||||
|
["MultiplicationExpression"],
|
||||||
|
],
|
||||||
|
"MultiplicationExpression": [
|
||||||
|
["MultiplicationExpression", STAR, "PrimaryExpression"],
|
||||||
|
["MultiplicationExpression", SLASH, "PrimaryExpression"],
|
||||||
|
["PrimaryExpression"],
|
||||||
|
],
|
||||||
|
"PrimaryExpression": [
|
||||||
|
[IDENTIFIER],
|
||||||
|
[SELF],
|
||||||
|
[NUMBER],
|
||||||
|
[STRING],
|
||||||
|
[TRUE],
|
||||||
|
[FALSE],
|
||||||
|
[BANG, "PrimaryExpression"],
|
||||||
|
[MINUS, "PrimaryExpression"],
|
||||||
|
|
||||||
|
["Block"],
|
||||||
|
["ConditionalExpression"],
|
||||||
|
# ["ListConstructorExpression"],
|
||||||
|
# ["ObjectConstructorExpression"],
|
||||||
|
# ["MatchExpression"],
|
||||||
|
|
||||||
|
# ["PrimaryExpression", LPAREN, "ExpressionList", RPAREN],
|
||||||
|
# ["PrimaryExpression", DOT, IDENTIFIER],
|
||||||
|
|
||||||
|
[LPAREN, "Expression", RPAREN],
|
||||||
|
],
|
||||||
|
|
||||||
|
"ConditionalExpression": [
|
||||||
|
[IF, "Expression", "Block"],
|
||||||
|
[IF, "Expression", "Block", ELSE, "ConditionalExpression"],
|
||||||
|
[IF, "Expression", "Block", ELSE, "Block"],
|
||||||
|
],
|
||||||
|
|
||||||
|
# "ListConstructorExpression": [
|
||||||
|
# [LCURLY, "ExpressionList", RCURLY],
|
||||||
|
# ],
|
||||||
|
|
||||||
|
# "ExpressionList": [
|
||||||
|
# [],
|
||||||
|
# ["Expression"],
|
||||||
|
# ["Expression", COMMA, "ExpressionList"],
|
||||||
|
# ],
|
||||||
|
|
||||||
|
# # Match Expression
|
||||||
|
# "MatchExpression": [
|
||||||
|
# [MATCH, "MatchBody"],
|
||||||
|
# ],
|
||||||
|
# "MatchBody": [
|
||||||
|
# [LCURLY, "MatchArms", RCURLY],
|
||||||
|
# ],
|
||||||
|
# "MatchArms": [
|
||||||
|
# [],
|
||||||
|
# ["MatchArm"],
|
||||||
|
# ["MatchArm", COMMA, "MatchArms"],
|
||||||
|
# ],
|
||||||
|
# "MatchArm": [
|
||||||
|
# ["Pattern", ARROW, "Expression"],
|
||||||
|
# ],
|
||||||
|
|
||||||
|
# # Pattern
|
||||||
|
# "Pattern": [
|
||||||
|
# ["VariableBinding", "PatternCore", AND, "AndExpression"],
|
||||||
|
# ["VariableBinding", "PatternCore"],
|
||||||
|
# ["PatternCore", AND, "AndExpression"],
|
||||||
|
# ["PatternCore"],
|
||||||
|
# ],
|
||||||
|
# "PatternCore": [
|
||||||
|
# ["TypeExpression"],
|
||||||
|
# ["WildcardPattern"],
|
||||||
|
# ],
|
||||||
|
# "WildcardPattern": [[UNDERSCORE]],
|
||||||
|
# "VariableBinding": [[IDENTIFIER, COLON]],
|
||||||
|
|
||||||
|
# # Object Constructor
|
||||||
|
# "ObjectConstructorExpression": [
|
||||||
|
# [NEW, "TypeIdentifier", "FieldList"],
|
||||||
|
# ],
|
||||||
|
# "FieldList": [
|
||||||
|
# [LCURLY, "FieldValues", RCURLY],
|
||||||
|
# ],
|
||||||
|
# "FieldValues": [
|
||||||
|
# [],
|
||||||
|
# ["FieldValue"],
|
||||||
|
# ["FieldValue", COMMA, "FieldValues"],
|
||||||
|
# ],
|
||||||
|
# "FieldValue": [
|
||||||
|
# [IDENTIFIER],
|
||||||
|
# [IDENTIFIER, COLON, "Expression"],
|
||||||
|
# ],
|
||||||
|
}
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
# dump_yacc(grammar)
|
||||||
|
grammar, precedence = desugar(grammar, precedence)
|
||||||
|
gen = parser_faster.GenerateLR1("File", grammar, precedence=precedence)
|
||||||
|
table = gen.gen_table()
|
||||||
|
print(parser_faster.format_table(gen, table))
|
||||||
|
print()
|
||||||
|
# tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"])
|
||||||
Loading…
Add table
Add a link
Reference in a new issue