Chaos: split tables, interactions, Terminal
- Tables are split into `actions` and `goto` now to make formatting nicer - Token is renamed Terminal - Likes are now Florps - Lexer now loaded dynamically (badly)
This commit is contained in:
parent
71078f76b4
commit
56d24c5fb9
3 changed files with 342 additions and 286 deletions
234
grammar.py
234
grammar.py
|
|
@ -1,56 +1,56 @@
|
||||||
# This is an example grammar.
|
# This is an example grammar.
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from parser import Assoc, Grammar, Nothing, Token, rule, seq
|
from parser import Assoc, Grammar, Nothing, Terminal, rule, seq, Rule
|
||||||
|
|
||||||
ARROW = Token("Arrow")
|
ARROW = Terminal("Arrow")
|
||||||
AS = Token("As")
|
AS = Terminal("As")
|
||||||
BAR = Token("Bar")
|
BAR = Terminal("Bar")
|
||||||
CLASS = Token("Class")
|
CLASS = Terminal("Class")
|
||||||
COLON = Token("Colon")
|
COLON = Terminal("Colon")
|
||||||
ELSE = Token("Else")
|
ELSE = Terminal("Else")
|
||||||
FOR = Token("For")
|
FOR = Terminal("For")
|
||||||
FUN = Token("Fun")
|
FUN = Terminal("Fun")
|
||||||
IDENTIFIER = Token("Identifier")
|
IDENTIFIER = Terminal("Identifier")
|
||||||
IF = Token("If")
|
IF = Terminal("If")
|
||||||
IMPORT = Token("Import")
|
IMPORT = Terminal("Import")
|
||||||
IN = Token("In")
|
IN = Terminal("In")
|
||||||
LCURLY = Token("LeftBrace")
|
LCURLY = Terminal("LeftBrace")
|
||||||
LET = Token("Let")
|
LET = Terminal("Let")
|
||||||
RCURLY = Token("RightBrace")
|
RCURLY = Terminal("RightBrace")
|
||||||
RETURN = Token("Return")
|
RETURN = Terminal("Return")
|
||||||
SEMICOLON = Token("Semicolon")
|
SEMICOLON = Terminal("Semicolon")
|
||||||
STRING = Token("String")
|
STRING = Terminal("String")
|
||||||
WHILE = Token("While")
|
WHILE = Terminal("While")
|
||||||
EQUAL = Token("Equal")
|
EQUAL = Terminal("Equal")
|
||||||
LPAREN = Token("LeftParen")
|
LPAREN = Terminal("LeftParen")
|
||||||
RPAREN = Token("RightParen")
|
RPAREN = Terminal("RightParen")
|
||||||
COMMA = Token("Comma")
|
COMMA = Terminal("Comma")
|
||||||
SELF = Token("Selff")
|
SELF = Terminal("Selff")
|
||||||
OR = Token("Or")
|
OR = Terminal("Or")
|
||||||
IS = Token("Is")
|
IS = Terminal("Is")
|
||||||
AND = Token("And")
|
AND = Terminal("And")
|
||||||
EQUALEQUAL = Token("EqualEqual")
|
EQUALEQUAL = Terminal("EqualEqual")
|
||||||
BANGEQUAL = Token("BangEqual")
|
BANGEQUAL = Terminal("BangEqual")
|
||||||
LESS = Token("Less")
|
LESS = Terminal("Less")
|
||||||
GREATER = Token("Greater")
|
GREATER = Terminal("Greater")
|
||||||
LESSEQUAL = Token("LessEqual")
|
LESSEQUAL = Terminal("LessEqual")
|
||||||
GREATEREQUAL = Token("GreaterEqual")
|
GREATEREQUAL = Terminal("GreaterEqual")
|
||||||
PLUS = Token("Plus")
|
PLUS = Terminal("Plus")
|
||||||
MINUS = Token("Minus")
|
MINUS = Terminal("Minus")
|
||||||
STAR = Token("Star")
|
STAR = Terminal("Star")
|
||||||
SLASH = Token("Slash")
|
SLASH = Terminal("Slash")
|
||||||
NUMBER = Token("Number")
|
NUMBER = Terminal("Number")
|
||||||
TRUE = Token("True")
|
TRUE = Terminal("True")
|
||||||
FALSE = Token("False")
|
FALSE = Terminal("False")
|
||||||
BANG = Token("Bang")
|
BANG = Terminal("Bang")
|
||||||
DOT = Token("Dot")
|
DOT = Terminal("Dot")
|
||||||
MATCH = Token("Match")
|
MATCH = Terminal("Match")
|
||||||
EXPORT = Token("Export")
|
EXPORT = Terminal("Export")
|
||||||
UNDERSCORE = Token("Underscore")
|
UNDERSCORE = Terminal("Underscore")
|
||||||
NEW = Token("New")
|
NEW = Terminal("New")
|
||||||
LSQUARE = Token("LeftBracket")
|
LSQUARE = Terminal("LeftBracket")
|
||||||
RSQUARE = Token("RightBracket")
|
RSQUARE = Terminal("RightBracket")
|
||||||
|
|
||||||
|
|
||||||
class FineGrammar(Grammar):
|
class FineGrammar(Grammar):
|
||||||
|
|
@ -77,58 +77,58 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def file(self):
|
def file(self) -> Rule:
|
||||||
return self._file_statement_list
|
return self._file_statement_list
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _file_statement_list(self):
|
def _file_statement_list(self) -> Rule:
|
||||||
return self._file_statement | (self._file_statement_list + self._file_statement)
|
return self._file_statement | (self._file_statement_list + self._file_statement)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _file_statement(self):
|
def _file_statement(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.import_statement | self.class_declaration | self.export_statement | self.statement
|
self.import_statement | self.class_declaration | self.export_statement | self._statement
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def import_statement(self):
|
def import_statement(self) -> Rule:
|
||||||
return seq(IMPORT, STRING, AS, IDENTIFIER, SEMICOLON)
|
return seq(IMPORT, STRING, AS, IDENTIFIER, SEMICOLON)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def class_declaration(self):
|
def class_declaration(self) -> Rule:
|
||||||
return seq(CLASS, IDENTIFIER, self.class_body)
|
return seq(CLASS, IDENTIFIER, self.class_body)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def class_body(self):
|
def class_body(self) -> Rule:
|
||||||
return seq(LCURLY, RCURLY) | seq(LCURLY, self._class_members, RCURLY)
|
return seq(LCURLY, RCURLY) | seq(LCURLY, self._class_members, RCURLY)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _class_members(self):
|
def _class_members(self) -> Rule:
|
||||||
return self._class_member | seq(self._class_members, self._class_member)
|
return self._class_member | seq(self._class_members, self._class_member)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def _class_member(self):
|
def _class_member(self) -> Rule:
|
||||||
return self.field_declaration | self.function_declaration
|
return self.field_declaration | self.function_declaration
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_declaration(self):
|
def field_declaration(self) -> Rule:
|
||||||
return seq(IDENTIFIER, COLON, self.type_expression, SEMICOLON)
|
return seq(IDENTIFIER, COLON, self.type_expression, SEMICOLON)
|
||||||
|
|
||||||
# Types
|
# Types
|
||||||
@rule
|
@rule
|
||||||
def type_expression(self):
|
def type_expression(self) -> Rule:
|
||||||
return self.alternate_type | self.type_identifier
|
return self.alternate_type | self.type_identifier
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def alternate_type(self):
|
def alternate_type(self) -> Rule:
|
||||||
return seq(self.type_expression, OR, self.type_identifier)
|
return seq(self.type_expression, OR, self.type_identifier)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def type_identifier(self):
|
def type_identifier(self) -> Rule:
|
||||||
return IDENTIFIER
|
return IDENTIFIER
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def export_statement(self):
|
def export_statement(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(EXPORT, self.class_declaration)
|
seq(EXPORT, self.class_declaration)
|
||||||
| seq(EXPORT, self.function_declaration)
|
| seq(EXPORT, self.function_declaration)
|
||||||
|
|
@ -137,18 +137,18 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def export_list(self):
|
def export_list(self) -> Rule:
|
||||||
return Nothing | IDENTIFIER | seq(IDENTIFIER, COMMA, self.export_list)
|
return Nothing | IDENTIFIER | seq(IDENTIFIER, COMMA, self.export_list)
|
||||||
|
|
||||||
# Functions
|
# Functions
|
||||||
@rule
|
@rule
|
||||||
def function_declaration(self):
|
def function_declaration(self) -> Rule:
|
||||||
return seq(FUN, IDENTIFIER, self.function_parameters, self.block) | seq(
|
return seq(FUN, IDENTIFIER, self.function_parameters, self.block) | seq(
|
||||||
FUN, IDENTIFIER, self.function_parameters, ARROW, self.type_expression, self.block
|
FUN, IDENTIFIER, self.function_parameters, ARROW, self.type_expression, self.block
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def function_parameters(self):
|
def function_parameters(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(LPAREN, RPAREN)
|
seq(LPAREN, RPAREN)
|
||||||
| seq(LPAREN, self.first_parameter, RPAREN)
|
| seq(LPAREN, self.first_parameter, RPAREN)
|
||||||
|
|
@ -156,33 +156,33 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def first_parameter(self):
|
def first_parameter(self) -> Rule:
|
||||||
return SELF | self.parameter
|
return SELF | self.parameter
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def parameter_list(self):
|
def parameter_list(self) -> Rule:
|
||||||
return Nothing | self.parameter | seq(self.parameter, COMMA, self.parameter_list)
|
return Nothing | self.parameter | seq(self.parameter, COMMA, self.parameter_list)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def parameter(self):
|
def parameter(self) -> Rule:
|
||||||
return seq(IDENTIFIER, COLON, self.type_expression)
|
return seq(IDENTIFIER, COLON, self.type_expression)
|
||||||
|
|
||||||
# Block
|
# Block
|
||||||
@rule
|
@rule
|
||||||
def block(self):
|
def block(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(LCURLY, RCURLY)
|
seq(LCURLY, RCURLY)
|
||||||
| seq(LCURLY, self.expression, RCURLY)
|
| seq(LCURLY, self.expression, RCURLY)
|
||||||
| seq(LCURLY, self.statement_list, RCURLY)
|
| seq(LCURLY, self._statement_list, RCURLY)
|
||||||
| seq(LCURLY, self.statement_list, self.expression, RCURLY)
|
| seq(LCURLY, self._statement_list, self.expression, RCURLY)
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def statement_list(self):
|
def _statement_list(self) -> Rule:
|
||||||
return self.statement | seq(self.statement_list, self.statement)
|
return self._statement | seq(self._statement_list, self._statement)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def statement(self):
|
def _statement(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.function_declaration
|
self.function_declaration
|
||||||
| self.let_statement
|
| self.let_statement
|
||||||
|
|
@ -194,56 +194,56 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def let_statement(self):
|
def let_statement(self) -> Rule:
|
||||||
return seq(LET, IDENTIFIER, EQUAL, self.expression, SEMICOLON)
|
return seq(LET, IDENTIFIER, EQUAL, self.expression, SEMICOLON)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def return_statement(self):
|
def return_statement(self) -> Rule:
|
||||||
return seq(RETURN, self.expression, SEMICOLON) | seq(RETURN, SEMICOLON)
|
return seq(RETURN, self.expression, SEMICOLON) | seq(RETURN, SEMICOLON)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def for_statement(self):
|
def for_statement(self) -> Rule:
|
||||||
return seq(FOR, self.iterator_variable, IN, self.expression, self.block)
|
return seq(FOR, self.iterator_variable, IN, self.expression, self.block)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def iterator_variable(self):
|
def iterator_variable(self) -> Rule:
|
||||||
return IDENTIFIER
|
return IDENTIFIER
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def if_statement(self):
|
def if_statement(self) -> Rule:
|
||||||
return self.conditional_expression
|
return self.conditional_expression
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def while_statement(self):
|
def while_statement(self) -> Rule:
|
||||||
return seq(WHILE, self.expression, self.block)
|
return seq(WHILE, self.expression, self.block)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def expression_statement(self):
|
def expression_statement(self) -> Rule:
|
||||||
return seq(self.expression, SEMICOLON)
|
return seq(self.expression, SEMICOLON)
|
||||||
|
|
||||||
# Expressions
|
# Expressions
|
||||||
@rule
|
@rule
|
||||||
def expression(self):
|
def expression(self) -> Rule:
|
||||||
return self.assignment_expression
|
return self.assignment_expression
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def assignment_expression(self):
|
def assignment_expression(self) -> Rule:
|
||||||
return seq(self.or_expression, EQUAL, self.assignment_expression) | self.or_expression
|
return seq(self.or_expression, EQUAL, self.assignment_expression) | self.or_expression
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def or_expression(self):
|
def or_expression(self) -> Rule:
|
||||||
return seq(self.or_expression, OR, self.is_expression) | self.is_expression
|
return seq(self.or_expression, OR, self.is_expression) | self.is_expression
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def is_expression(self):
|
def is_expression(self) -> Rule:
|
||||||
return seq(self.is_expression, IS, self.pattern) | self.and_expression
|
return seq(self.is_expression, IS, self.pattern) | self.and_expression
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def and_expression(self):
|
def and_expression(self) -> Rule:
|
||||||
return seq(self.and_expression, AND, self.equality_expression) | self.equality_expression
|
return seq(self.and_expression, AND, self.equality_expression) | self.equality_expression
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def equality_expression(self):
|
def equality_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.equality_expression, EQUALEQUAL, self.relation_expression)
|
seq(self.equality_expression, EQUALEQUAL, self.relation_expression)
|
||||||
| seq(self.equality_expression, BANGEQUAL, self.relation_expression)
|
| seq(self.equality_expression, BANGEQUAL, self.relation_expression)
|
||||||
|
|
@ -251,7 +251,7 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def relation_expression(self):
|
def relation_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.relation_expression, LESS, self.additive_expression)
|
seq(self.relation_expression, LESS, self.additive_expression)
|
||||||
| seq(self.relation_expression, LESSEQUAL, self.additive_expression)
|
| seq(self.relation_expression, LESSEQUAL, self.additive_expression)
|
||||||
|
|
@ -261,7 +261,7 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def additive_expression(self):
|
def additive_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.additive_expression, PLUS, self.multiplication_expression)
|
seq(self.additive_expression, PLUS, self.multiplication_expression)
|
||||||
| seq(self.additive_expression, MINUS, self.multiplication_expression)
|
| seq(self.additive_expression, MINUS, self.multiplication_expression)
|
||||||
|
|
@ -269,7 +269,7 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def multiplication_expression(self):
|
def multiplication_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.multiplication_expression, STAR, self.primary_expression)
|
seq(self.multiplication_expression, STAR, self.primary_expression)
|
||||||
| seq(self.multiplication_expression, SLASH, self.primary_expression)
|
| seq(self.multiplication_expression, SLASH, self.primary_expression)
|
||||||
|
|
@ -277,7 +277,7 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def primary_expression(self):
|
def primary_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
IDENTIFIER
|
IDENTIFIER
|
||||||
| SELF
|
| SELF
|
||||||
|
|
@ -299,7 +299,7 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def conditional_expression(self):
|
def conditional_expression(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(IF, self.expression, self.block)
|
seq(IF, self.expression, self.block)
|
||||||
| seq(IF, self.expression, self.block, ELSE, self.conditional_expression)
|
| seq(IF, self.expression, self.block, ELSE, self.conditional_expression)
|
||||||
|
|
@ -307,11 +307,11 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def list_constructor_expression(self):
|
def list_constructor_expression(self) -> Rule:
|
||||||
return seq(LSQUARE, RSQUARE) | seq(LSQUARE, self.expression_list, RSQUARE)
|
return seq(LSQUARE, RSQUARE) | seq(LSQUARE, self.expression_list, RSQUARE)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def expression_list(self):
|
def expression_list(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.expression
|
self.expression
|
||||||
| seq(self.expression, COMMA)
|
| seq(self.expression, COMMA)
|
||||||
|
|
@ -319,15 +319,15 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_expression(self):
|
def match_expression(self) -> Rule:
|
||||||
return seq(MATCH, self.expression, self.match_body)
|
return seq(MATCH, self.expression, self.match_body)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_body(self):
|
def match_body(self) -> Rule:
|
||||||
return seq(LCURLY, RCURLY) | seq(LCURLY, self.match_arms, RCURLY)
|
return seq(LCURLY, RCURLY) | seq(LCURLY, self.match_arms, RCURLY)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_arms(self):
|
def match_arms(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.match_arm
|
self.match_arm
|
||||||
| seq(self.match_arm, COMMA)
|
| seq(self.match_arm, COMMA)
|
||||||
|
|
@ -335,11 +335,11 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def match_arm(self):
|
def match_arm(self) -> Rule:
|
||||||
return seq(self.pattern, ARROW, self.expression)
|
return seq(self.pattern, ARROW, self.expression)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def pattern(self):
|
def pattern(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
seq(self.variable_binding, self.pattern_core, AND, self.and_expression)
|
seq(self.variable_binding, self.pattern_core, AND, self.and_expression)
|
||||||
| seq(self.variable_binding, self.pattern_core)
|
| seq(self.variable_binding, self.pattern_core)
|
||||||
|
|
@ -348,27 +348,27 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def pattern_core(self):
|
def pattern_core(self) -> Rule:
|
||||||
return self.type_expression | self.wildcard_pattern
|
return self.type_expression | self.wildcard_pattern
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def wildcard_pattern(self):
|
def wildcard_pattern(self) -> Rule:
|
||||||
return UNDERSCORE
|
return UNDERSCORE
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def variable_binding(self):
|
def variable_binding(self) -> Rule:
|
||||||
return seq(IDENTIFIER, COLON)
|
return seq(IDENTIFIER, COLON)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def object_constructor_expression(self):
|
def object_constructor_expression(self) -> Rule:
|
||||||
return seq(NEW, self.type_identifier, self.field_list)
|
return seq(NEW, self.type_identifier, self.field_list)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_list(self):
|
def field_list(self) -> Rule:
|
||||||
return seq(LCURLY, RCURLY) | seq(LCURLY, self.field_values, RCURLY)
|
return seq(LCURLY, RCURLY) | seq(LCURLY, self.field_values, RCURLY)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_values(self):
|
def field_values(self) -> Rule:
|
||||||
return (
|
return (
|
||||||
self.field_value
|
self.field_value
|
||||||
| seq(self.field_value, COMMA)
|
| seq(self.field_value, COMMA)
|
||||||
|
|
@ -376,7 +376,7 @@ class FineGrammar(Grammar):
|
||||||
)
|
)
|
||||||
|
|
||||||
@rule
|
@rule
|
||||||
def field_value(self):
|
def field_value(self) -> Rule:
|
||||||
return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
|
return IDENTIFIER | seq(IDENTIFIER, COLON, self.expression)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -533,16 +533,19 @@ import bisect
|
||||||
class FineTokens:
|
class FineTokens:
|
||||||
def __init__(self, src: str):
|
def __init__(self, src: str):
|
||||||
self.src = src
|
self.src = src
|
||||||
self.tokens = list(tokenize(src))
|
self._tokens = list(tokenize(src))
|
||||||
self.lines = [m.start() for m in re.finditer("\n", src)]
|
self.lines = [m.start() for m in re.finditer("\n", src)]
|
||||||
|
|
||||||
|
def tokens(self):
|
||||||
|
return self._tokens
|
||||||
|
|
||||||
def dump(self, *, start=None, end=None):
|
def dump(self, *, start=None, end=None):
|
||||||
if start is None:
|
if start is None:
|
||||||
start = 0
|
start = 0
|
||||||
if end is None:
|
if end is None:
|
||||||
end = len(self.tokens)
|
end = len(self._tokens)
|
||||||
|
|
||||||
for token in self.tokens[start:end]:
|
for token in self._tokens[start:end]:
|
||||||
(kind, start, length) = token
|
(kind, start, length) = token
|
||||||
line_index = bisect.bisect_left(self.lines, start)
|
line_index = bisect.bisect_left(self.lines, start)
|
||||||
if line_index == 0:
|
if line_index == 0:
|
||||||
|
|
@ -553,14 +556,3 @@ class FineTokens:
|
||||||
print(
|
print(
|
||||||
f"{start:04} {kind.value:12} {self.src[start:start+length]} ({line_index}, {column_index})"
|
f"{start:04} {kind.value:12} {self.src[start:start+length]} ({line_index}, {column_index})"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
grammar = FineGrammar()
|
|
||||||
table = grammar.build_table(start="expression")
|
|
||||||
|
|
||||||
print(f"{len(table)} states")
|
|
||||||
|
|
||||||
average_entries = sum(len(row) for row in table) / len(table)
|
|
||||||
max_entries = max(len(row) for row in table)
|
|
||||||
print(f"{average_entries} average, {max_entries} max")
|
|
||||||
|
|
|
||||||
197
harness.py
197
harness.py
|
|
@ -7,11 +7,12 @@ import select
|
||||||
import sys
|
import sys
|
||||||
import termios
|
import termios
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
import tty
|
import tty
|
||||||
|
import types
|
||||||
import typing
|
import typing
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import grammar
|
|
||||||
import parser
|
import parser
|
||||||
|
|
||||||
# from parser import Token, Grammar, rule, seq
|
# from parser import Token, Grammar, rule, seq
|
||||||
|
|
@ -47,7 +48,8 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
|
||||||
This is not a *great* parser, it's really just a demo for what you can
|
This is not a *great* parser, it's really just a demo for what you can
|
||||||
do with the table.
|
do with the table.
|
||||||
"""
|
"""
|
||||||
input: list[str] = [t.value for (t, _, _) in tokens.tokens]
|
input_tokens = tokens.tokens()
|
||||||
|
input: list[str] = [t.value for (t, _, _) in input_tokens]
|
||||||
|
|
||||||
assert "$" not in input
|
assert "$" not in input
|
||||||
input = input + ["$"]
|
input = input + ["$"]
|
||||||
|
|
@ -61,7 +63,7 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
|
||||||
current_state = stack[-1][0]
|
current_state = stack[-1][0]
|
||||||
current_token = input[input_index]
|
current_token = input[input_index]
|
||||||
|
|
||||||
action = table.states[current_state].get(current_token, parser.Error())
|
action = table.actions[current_state].get(current_token, parser.Error())
|
||||||
if trace:
|
if trace:
|
||||||
trace(stack, input, input_index, action)
|
trace(stack, input, input_index, action)
|
||||||
|
|
||||||
|
|
@ -84,21 +86,21 @@ def parse(table: parser.ParseTable, tokens, trace=None) -> typing.Tuple[Tree | N
|
||||||
value = Tree(name=name if not transparent else None, children=tuple(children))
|
value = Tree(name=name if not transparent else None, children=tuple(children))
|
||||||
stack = stack[:-size]
|
stack = stack[:-size]
|
||||||
|
|
||||||
goto = table.states[stack[-1][0]].get(name, parser.Error())
|
goto = table.gotos[stack[-1][0]].get(name)
|
||||||
assert isinstance(goto, parser.Goto)
|
assert goto is not None
|
||||||
stack.append((goto.state, value))
|
stack.append((goto, value))
|
||||||
|
|
||||||
case parser.Shift(state):
|
case parser.Shift(state):
|
||||||
stack.append((state, current_token))
|
stack.append((state, current_token))
|
||||||
input_index += 1
|
input_index += 1
|
||||||
|
|
||||||
case parser.Error():
|
case parser.Error():
|
||||||
if input_index >= len(tokens.tokens):
|
if input_index >= len(input_tokens):
|
||||||
message = "Unexpected end of file"
|
message = "Unexpected end of file"
|
||||||
start = tokens.tokens[-1][1]
|
start = input_tokens[-1][1]
|
||||||
else:
|
else:
|
||||||
message = f"Syntax error: unexpected symbol {current_token}"
|
message = f"Syntax error: unexpected symbol {current_token}"
|
||||||
(_, start, _) = tokens.tokens[input_index]
|
(_, start, _) = input_tokens[input_index]
|
||||||
|
|
||||||
line_index = bisect.bisect_left(tokens.lines, start)
|
line_index = bisect.bisect_left(tokens.lines, start)
|
||||||
if line_index == 0:
|
if line_index == 0:
|
||||||
|
|
@ -147,7 +149,7 @@ def CSI(x: bytes) -> bytes:
|
||||||
return ESC(b"[" + x)
|
return ESC(b"[" + x)
|
||||||
|
|
||||||
|
|
||||||
CLEAR = CSI(b"2J")
|
CLEAR = CSI(b"H") + CSI(b"J")
|
||||||
|
|
||||||
|
|
||||||
def enter_alt_screen():
|
def enter_alt_screen():
|
||||||
|
|
@ -158,15 +160,108 @@ def leave_alt_screen():
|
||||||
sys.stdout.buffer.write(CSI(b"?1049l"))
|
sys.stdout.buffer.write(CSI(b"?1049l"))
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicModule:
|
||||||
|
file_name: str
|
||||||
|
member_name: str | None
|
||||||
|
|
||||||
|
last_time: float | None
|
||||||
|
module: types.ModuleType | None
|
||||||
|
|
||||||
|
def __init__(self, file_name, member_name):
|
||||||
|
self.file_name = file_name
|
||||||
|
self.member_name = member_name
|
||||||
|
|
||||||
|
self.last_time = None
|
||||||
|
self.module = None
|
||||||
|
self.value = None
|
||||||
|
|
||||||
|
def _predicate(self, member) -> bool:
|
||||||
|
if not inspect.isclass(member):
|
||||||
|
return False
|
||||||
|
|
||||||
|
assert self.module is not None
|
||||||
|
if member.__module__ != self.module.__name__:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _transform(self, value):
|
||||||
|
return value
|
||||||
|
|
||||||
|
def get(self):
|
||||||
|
st = os.stat(self.file_name)
|
||||||
|
if self.last_time == st.st_mtime:
|
||||||
|
assert self.value is not None
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
self.value = None
|
||||||
|
|
||||||
|
if self.module is None:
|
||||||
|
mod_name = inspect.getmodulename(self.file_name)
|
||||||
|
if mod_name is None:
|
||||||
|
raise Exception(f"{self.file_name} does not seem to be a module")
|
||||||
|
self.module = importlib.import_module(mod_name)
|
||||||
|
else:
|
||||||
|
importlib.reload(self.module)
|
||||||
|
|
||||||
|
if self.member_name is None:
|
||||||
|
classes = inspect.getmembers(self.module, self._predicate)
|
||||||
|
if len(classes) == 0:
|
||||||
|
raise Exception(f"No grammars found in {self.file_name}")
|
||||||
|
if len(classes) > 1:
|
||||||
|
raise Exception(
|
||||||
|
f"{len(classes)} grammars found in {self.file_name}: {', '.join(c[0] for c in classes)}"
|
||||||
|
)
|
||||||
|
cls = classes[0][1]
|
||||||
|
else:
|
||||||
|
cls = getattr(self.module, self.member_name)
|
||||||
|
if cls is None:
|
||||||
|
raise Exception(f"Cannot find {self.member_name} in {self.file_name}")
|
||||||
|
if not self._predicate(cls):
|
||||||
|
raise Exception(f"{self.member_name} in {self.file_name} is not suitable")
|
||||||
|
|
||||||
|
self.value = self._transform(cls)
|
||||||
|
self.last_time = st.st_mtime
|
||||||
|
return self.value
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicGrammarModule(DynamicModule):
|
||||||
|
def __init__(self, file_name, member_name, start_rule, generator):
|
||||||
|
super().__init__(file_name, member_name)
|
||||||
|
|
||||||
|
self.start_rule = start_rule
|
||||||
|
self.generator = generator
|
||||||
|
|
||||||
|
def _predicate(self, member) -> bool:
|
||||||
|
if not super()._predicate(member):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if getattr(member, "build_table", None):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _transform(self, value):
|
||||||
|
return value().build_table(start=self.start_rule, generator=self.generator)
|
||||||
|
|
||||||
|
|
||||||
|
class DynamicLexerModule(DynamicModule):
|
||||||
|
def _predicate(self, member) -> bool:
|
||||||
|
if not super()._predicate(member):
|
||||||
|
return False
|
||||||
|
|
||||||
|
if getattr(member, "tokens", None):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class Harness:
|
class Harness:
|
||||||
source: str | None
|
source: str | None
|
||||||
table: parser.ParseTable | None
|
table: parser.ParseTable | None
|
||||||
tree: Tree | None
|
tree: Tree | None
|
||||||
|
|
||||||
def __init__(self, lexer_func, start_rule, source_path):
|
def __init__(self, start_rule, source_path):
|
||||||
# self.generator = parser.GenerateLR1
|
|
||||||
self.generator = parser.GenerateLALR
|
|
||||||
self.lexer_func = lexer_func
|
|
||||||
self.start_rule = start_rule
|
self.start_rule = start_rule
|
||||||
self.source_path = source_path
|
self.source_path = source_path
|
||||||
|
|
||||||
|
|
@ -176,10 +271,11 @@ class Harness:
|
||||||
self.tree = None
|
self.tree = None
|
||||||
self.errors = None
|
self.errors = None
|
||||||
|
|
||||||
self.grammar_file_name = "./grammar.py"
|
self.grammar_module = DynamicGrammarModule(
|
||||||
self.last_grammar_time = None
|
"./grammar.py", None, self.start_rule, generator=parser.GenerateLALR
|
||||||
self.grammar_module = None
|
)
|
||||||
self.grammar_name = None
|
|
||||||
|
self.lexer_module = DynamicLexerModule("./grammar.py", None)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -191,71 +287,19 @@ class Harness:
|
||||||
|
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
# def should_reload_grammar(self):
|
|
||||||
|
|
||||||
def load_grammar(self) -> parser.ParseTable:
|
def load_grammar(self) -> parser.ParseTable:
|
||||||
st = os.stat(self.grammar_file_name)
|
return self.grammar_module.get()
|
||||||
if self.last_grammar_time == st.st_mtime:
|
|
||||||
assert self.table is not None
|
|
||||||
return self.table
|
|
||||||
|
|
||||||
self.table = None
|
|
||||||
|
|
||||||
if self.grammar_module is None:
|
|
||||||
mod_name = inspect.getmodulename(self.grammar_file_name)
|
|
||||||
if mod_name is None:
|
|
||||||
raise Exception(f"{self.grammar_file_name} does not seem to be a module")
|
|
||||||
self.grammar_module = importlib.import_module(mod_name)
|
|
||||||
else:
|
|
||||||
importlib.reload(self.grammar_module)
|
|
||||||
|
|
||||||
def is_grammar(cls):
|
|
||||||
if not inspect.isclass(cls):
|
|
||||||
return False
|
|
||||||
|
|
||||||
assert self.grammar_module is not None
|
|
||||||
if cls.__module__ != self.grammar_module.__name__:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if getattr(cls, "build_table", None):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
if self.grammar_name is None:
|
|
||||||
classes = inspect.getmembers(self.grammar_module, is_grammar)
|
|
||||||
if len(classes) == 0:
|
|
||||||
raise Exception(f"No grammars found in {self.grammar_file_name}")
|
|
||||||
if len(classes) > 1:
|
|
||||||
raise Exception(
|
|
||||||
f"{len(classes)} grammars found in {self.grammar_file_name}: {', '.join(c[0] for c in classes)}"
|
|
||||||
)
|
|
||||||
grammar_func = classes[0][1]
|
|
||||||
else:
|
|
||||||
cls = getattr(self.grammar_module, self.grammar_name)
|
|
||||||
if cls is None:
|
|
||||||
raise Exception(f"Cannot find {self.grammar_name} in {self.grammar_file_name}")
|
|
||||||
if not is_grammar(cls):
|
|
||||||
raise Exception(
|
|
||||||
f"{self.grammar_name} in {self.grammar_file_name} does not seem to be a grammar"
|
|
||||||
)
|
|
||||||
grammar_func = cls
|
|
||||||
|
|
||||||
self.table = grammar_func().build_table(start=self.start_rule, generator=self.generator)
|
|
||||||
self.last_grammar_time = st.st_mtime
|
|
||||||
|
|
||||||
assert self.table is not None
|
|
||||||
return self.table
|
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
try:
|
try:
|
||||||
table = self.load_grammar()
|
table = self.load_grammar()
|
||||||
|
lexer_func = self.lexer_module.get()
|
||||||
|
|
||||||
with open(self.source_path, "r", encoding="utf-8") as f:
|
with open(self.source_path, "r", encoding="utf-8") as f:
|
||||||
self.source = f.read()
|
self.source = f.read()
|
||||||
|
|
||||||
self.tokens = self.lexer_func(self.source)
|
self.tokens = lexer_func(self.source)
|
||||||
lex_time = time.time()
|
lex_time = time.time()
|
||||||
|
|
||||||
# print(f"{tokens.lines}")
|
# print(f"{tokens.lines}")
|
||||||
|
|
@ -268,7 +312,9 @@ class Harness:
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.tree = None
|
self.tree = None
|
||||||
self.errors = [f"Error loading grammar: {e}"]
|
self.errors = ["Error loading grammar:"] + [
|
||||||
|
" " + l.rstrip() for fl in traceback.format_exception(e) for l in fl.splitlines()
|
||||||
|
]
|
||||||
parse_elapsed = time.time() - start_time
|
parse_elapsed = time.time() - start_time
|
||||||
table = None
|
table = None
|
||||||
|
|
||||||
|
|
@ -276,7 +322,7 @@ class Harness:
|
||||||
rows, cols = termios.tcgetwinsize(sys.stdout.fileno())
|
rows, cols = termios.tcgetwinsize(sys.stdout.fileno())
|
||||||
|
|
||||||
if table is not None:
|
if table is not None:
|
||||||
states = table.states
|
states = table.actions
|
||||||
average_entries = sum(len(row) for row in states) / len(states)
|
average_entries = sum(len(row) for row in states) / len(states)
|
||||||
max_entries = max(len(row) for row in states)
|
max_entries = max(len(row) for row in states)
|
||||||
print(
|
print(
|
||||||
|
|
@ -320,7 +366,6 @@ if __name__ == "__main__":
|
||||||
enter_alt_screen()
|
enter_alt_screen()
|
||||||
|
|
||||||
h = Harness(
|
h = Harness(
|
||||||
lexer_func=grammar.FineTokens,
|
|
||||||
start_rule="file",
|
start_rule="file",
|
||||||
source_path=source_path,
|
source_path=source_path,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
197
parser.py
197
parser.py
|
|
@ -21,10 +21,10 @@ To get started, create a grammar that derives from the `Grammar` class. Create
|
||||||
one method per nonterminal, decorated with the `rule` decorator. Here's an
|
one method per nonterminal, decorated with the `rule` decorator. Here's an
|
||||||
example:
|
example:
|
||||||
|
|
||||||
PLUS = Token('+')
|
PLUS = Terminal('+')
|
||||||
LPAREN = Token('(')
|
LPAREN = Terminal('(')
|
||||||
RPAREN = Token(')')
|
RPAREN = Terminal(')')
|
||||||
ID = Token('id')
|
ID = Terminal('id')
|
||||||
|
|
||||||
class SimpleGrammar(Grammar):
|
class SimpleGrammar(Grammar):
|
||||||
@rule
|
@rule
|
||||||
|
|
@ -410,11 +410,6 @@ class Shift(Action):
|
||||||
state: int
|
state: int
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
|
||||||
class Goto(Action):
|
|
||||||
state: int
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class Accept(Action):
|
class Accept(Action):
|
||||||
pass
|
pass
|
||||||
|
|
@ -511,8 +506,7 @@ class ErrorCollection:
|
||||||
case Accept():
|
case Accept():
|
||||||
action_str = "accept the parse"
|
action_str = "accept the parse"
|
||||||
case _:
|
case _:
|
||||||
assert isinstance(action, Goto)
|
raise Exception(f"unknown action type {action}")
|
||||||
raise Exception("Shouldn't conflict on goto ever")
|
|
||||||
|
|
||||||
lines.append(
|
lines.append(
|
||||||
f" - We are in the rule `{name}: {rule}` and we should {action_str}"
|
f" - We are in the rule `{name}: {rule}` and we should {action_str}"
|
||||||
|
|
@ -525,7 +519,53 @@ class ErrorCollection:
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class ParseTable:
|
class ParseTable:
|
||||||
states: list[dict[str, Action]]
|
actions: list[dict[str, Action]]
|
||||||
|
gotos: list[dict[str, int]]
|
||||||
|
|
||||||
|
def format(self):
|
||||||
|
"""Format a parser table so pretty."""
|
||||||
|
|
||||||
|
def format_action(actions: dict[str, Action], terminal: str):
|
||||||
|
action = actions.get(terminal)
|
||||||
|
match action:
|
||||||
|
case Accept():
|
||||||
|
return "accept"
|
||||||
|
case Shift(state=state):
|
||||||
|
return f"s{state}"
|
||||||
|
case Reduce(count=count):
|
||||||
|
return f"r{count}"
|
||||||
|
case _:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def format_goto(gotos: dict[str, int], nt: str):
|
||||||
|
index = gotos.get(nt)
|
||||||
|
if index is None:
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
return str(index)
|
||||||
|
|
||||||
|
terminals = list(sorted({k for row in self.actions for k in row.keys()}))
|
||||||
|
nonterminals = list(sorted({k for row in self.gotos for k in row.keys()}))
|
||||||
|
|
||||||
|
header = " | {terms} | {nts}".format(
|
||||||
|
terms=" ".join(f"{terminal: <6}" for terminal in terminals),
|
||||||
|
nts=" ".join(f"{nt: <5}" for nt in nonterminals),
|
||||||
|
)
|
||||||
|
|
||||||
|
lines = [
|
||||||
|
header,
|
||||||
|
"-" * len(header),
|
||||||
|
] + [
|
||||||
|
"{index: <4} | {actions} | {gotos}".format(
|
||||||
|
index=i,
|
||||||
|
actions=" ".join(
|
||||||
|
"{0: <6}".format(format_action(actions, terminal)) for terminal in terminals
|
||||||
|
),
|
||||||
|
gotos=" ".join("{0: <5}".format(format_goto(gotos, nt)) for nt in nonterminals),
|
||||||
|
)
|
||||||
|
for i, (actions, gotos) in enumerate(zip(self.actions, self.gotos))
|
||||||
|
]
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
class TableBuilder(object):
|
class TableBuilder(object):
|
||||||
|
|
@ -536,12 +576,14 @@ class TableBuilder(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
errors: ErrorCollection
|
errors: ErrorCollection
|
||||||
table: list[dict[str, Action]]
|
actions: list[dict[str, Action]]
|
||||||
|
gotos: list[dict[str, int]]
|
||||||
alphabet: list[str]
|
alphabet: list[str]
|
||||||
precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
|
precedence: typing.Tuple[typing.Tuple[Assoc, int], ...]
|
||||||
transparents: set[str]
|
transparents: set[str]
|
||||||
|
|
||||||
row: None | list[typing.Tuple[None | Action, None | Configuration]]
|
action_row: None | list[typing.Tuple[None | Action, None | Configuration]]
|
||||||
|
goto_row: None | list[None | int]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
|
@ -550,11 +592,14 @@ class TableBuilder(object):
|
||||||
transparents: set[str],
|
transparents: set[str],
|
||||||
):
|
):
|
||||||
self.errors = ErrorCollection()
|
self.errors = ErrorCollection()
|
||||||
self.table = []
|
self.actions = []
|
||||||
|
self.gotos = []
|
||||||
|
|
||||||
self.alphabet = alphabet
|
self.alphabet = alphabet
|
||||||
self.precedence = precedence
|
self.precedence = precedence
|
||||||
self.transparents = transparents
|
self.transparents = transparents
|
||||||
self.row = None
|
self.action_row = None
|
||||||
|
self.goto_row = None
|
||||||
|
|
||||||
def flush(self, all_sets: ConfigurationSetInfo) -> ParseTable:
|
def flush(self, all_sets: ConfigurationSetInfo) -> ParseTable:
|
||||||
"""Finish building the table and return it.
|
"""Finish building the table and return it.
|
||||||
|
|
@ -565,20 +610,31 @@ class TableBuilder(object):
|
||||||
if self.errors.any():
|
if self.errors.any():
|
||||||
errors = self.errors.format(self.alphabet, all_sets)
|
errors = self.errors.format(self.alphabet, all_sets)
|
||||||
raise ValueError(f"Errors building the table:\n\n{errors}")
|
raise ValueError(f"Errors building the table:\n\n{errors}")
|
||||||
return ParseTable(states=self.table)
|
return ParseTable(actions=self.actions, gotos=self.gotos)
|
||||||
|
|
||||||
def new_row(self, config_set: ConfigSet):
|
def new_row(self, config_set: ConfigSet):
|
||||||
"""Start a new row, processing the given config set. Call this before
|
"""Start a new row, processing the given config set. Call this before
|
||||||
doing anything else.
|
doing anything else.
|
||||||
"""
|
"""
|
||||||
self._flush_row()
|
self._flush_row()
|
||||||
self.row = [(None, None) for _ in self.alphabet]
|
self.action_row = [(None, None) for _ in self.alphabet]
|
||||||
|
self.goto_row = [None for _ in self.alphabet]
|
||||||
self.current_config_set = config_set
|
self.current_config_set = config_set
|
||||||
|
|
||||||
def _flush_row(self):
|
def _flush_row(self):
|
||||||
if self.row:
|
if self.action_row:
|
||||||
actions = {self.alphabet[k]: v[0] for k, v in enumerate(self.row) if v[0] is not None}
|
actions = {
|
||||||
self.table.append(actions)
|
self.alphabet[sym]: e[0]
|
||||||
|
for sym, e in enumerate(self.action_row)
|
||||||
|
if e[0] is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
self.actions.append(actions)
|
||||||
|
|
||||||
|
if self.goto_row:
|
||||||
|
gotos = {self.alphabet[sym]: e for sym, e in enumerate(self.goto_row) if e is not None}
|
||||||
|
|
||||||
|
self.gotos.append(gotos)
|
||||||
|
|
||||||
def set_table_reduce(self, symbol: int, config: Configuration):
|
def set_table_reduce(self, symbol: int, config: Configuration):
|
||||||
"""Mark a reduce of the given configuration for the given symbol in the
|
"""Mark a reduce of the given configuration for the given symbol in the
|
||||||
|
|
@ -604,7 +660,9 @@ class TableBuilder(object):
|
||||||
|
|
||||||
def set_table_goto(self, symbol: int, index: int):
|
def set_table_goto(self, symbol: int, index: int):
|
||||||
"""Set the goto for the given nonterminal symbol in the current row."""
|
"""Set the goto for the given nonterminal symbol in the current row."""
|
||||||
self._set_table_action(symbol, Goto(index), None)
|
assert self.goto_row is not None
|
||||||
|
assert self.goto_row[symbol] is None # ?
|
||||||
|
self.goto_row[symbol] = index
|
||||||
|
|
||||||
def _action_precedence(self, symbol: int, action: Action, config: Configuration):
|
def _action_precedence(self, symbol: int, action: Action, config: Configuration):
|
||||||
if isinstance(action, Shift):
|
if isinstance(action, Shift):
|
||||||
|
|
@ -620,8 +678,8 @@ class TableBuilder(object):
|
||||||
"""
|
"""
|
||||||
assert isinstance(symbol_id, int)
|
assert isinstance(symbol_id, int)
|
||||||
|
|
||||||
assert self.row is not None
|
assert self.action_row is not None
|
||||||
existing, existing_config = self.row[symbol_id]
|
existing, existing_config = self.action_row[symbol_id]
|
||||||
if existing is not None and existing != action:
|
if existing is not None and existing != action:
|
||||||
assert existing_config is not None
|
assert existing_config is not None
|
||||||
assert config is not None
|
assert config is not None
|
||||||
|
|
@ -675,7 +733,7 @@ class TableBuilder(object):
|
||||||
# action, just allow the overwrite with no change.
|
# action, just allow the overwrite with no change.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self.row[symbol_id] = (action, config)
|
self.action_row[symbol_id] = (action, config)
|
||||||
|
|
||||||
|
|
||||||
class GenerateLR0:
|
class GenerateLR0:
|
||||||
|
|
@ -1036,7 +1094,7 @@ def parse(table: ParseTable, input, trace=False):
|
||||||
current_state = stack[-1][0]
|
current_state = stack[-1][0]
|
||||||
current_token = input[input_index]
|
current_token = input[input_index]
|
||||||
|
|
||||||
action = table.states[current_state].get(current_token, Error())
|
action = table.actions[current_state].get(current_token, Error())
|
||||||
if trace:
|
if trace:
|
||||||
print(
|
print(
|
||||||
"{stack: <20} {input: <50} {action: <5}".format(
|
"{stack: <20} {input: <50} {action: <5}".format(
|
||||||
|
|
@ -1061,9 +1119,9 @@ def parse(table: ParseTable, input, trace=False):
|
||||||
value = (name if not transparent else None, tuple(children))
|
value = (name if not transparent else None, tuple(children))
|
||||||
stack = stack[:-size]
|
stack = stack[:-size]
|
||||||
|
|
||||||
goto = table.states[stack[-1][0]].get(name, Error())
|
goto = table.gotos[stack[-1][0]].get(name)
|
||||||
assert isinstance(goto, Goto)
|
assert goto is not None
|
||||||
stack.append((goto.state, value))
|
stack.append((goto, value))
|
||||||
|
|
||||||
case Shift(state):
|
case Shift(state):
|
||||||
stack.append((state, (current_token, ())))
|
stack.append((state, (current_token, ())))
|
||||||
|
|
@ -1554,7 +1612,7 @@ class Rule:
|
||||||
return SequenceRule(self, other)
|
return SequenceRule(self, other)
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def flatten(self) -> typing.Generator[list["str | Token"], None, None]:
|
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
|
||||||
"""Convert this potentially nested and branching set of rules into a
|
"""Convert this potentially nested and branching set of rules into a
|
||||||
series of nice, flat symbol lists.
|
series of nice, flat symbol lists.
|
||||||
|
|
||||||
|
|
@ -1574,7 +1632,7 @@ class Rule:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
class Token(Rule):
|
class Terminal(Rule):
|
||||||
"""A token, or terminal symbol in the grammar."""
|
"""A token, or terminal symbol in the grammar."""
|
||||||
|
|
||||||
value: str
|
value: str
|
||||||
|
|
@ -1582,7 +1640,7 @@ class Token(Rule):
|
||||||
def __init__(self, value):
|
def __init__(self, value):
|
||||||
self.value = sys.intern(value)
|
self.value = sys.intern(value)
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list["str | Token"], None, None]:
|
def flatten(self) -> typing.Generator[list["str | Terminal"], None, None]:
|
||||||
# We are just ourselves when flattened.
|
# We are just ourselves when flattened.
|
||||||
yield [self]
|
yield [self]
|
||||||
|
|
||||||
|
|
@ -1616,7 +1674,7 @@ class NonTerminal(Rule):
|
||||||
self.name = name or fn.__name__
|
self.name = name or fn.__name__
|
||||||
self.transparent = transparent
|
self.transparent = transparent
|
||||||
|
|
||||||
def generate_body(self, grammar) -> list[list[str | Token]]:
|
def generate_body(self, grammar) -> list[list[str | Terminal]]:
|
||||||
"""Generate the body of the non-terminal.
|
"""Generate the body of the non-terminal.
|
||||||
|
|
||||||
We do this by first calling the associated function in order to get a
|
We do this by first calling the associated function in order to get a
|
||||||
|
|
@ -1625,7 +1683,7 @@ class NonTerminal(Rule):
|
||||||
"""
|
"""
|
||||||
return [rule for rule in self.fn(grammar).flatten()]
|
return [rule for rule in self.fn(grammar).flatten()]
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||||
# Although we contain multitudes, when flattened we're being asked in
|
# Although we contain multitudes, when flattened we're being asked in
|
||||||
# the context of some other production. Yield ourselves, and trust that
|
# the context of some other production. Yield ourselves, and trust that
|
||||||
# in time we will be asked to generate our body.
|
# in time we will be asked to generate our body.
|
||||||
|
|
@ -1639,7 +1697,7 @@ class AlternativeRule(Rule):
|
||||||
self.left = left
|
self.left = left
|
||||||
self.right = right
|
self.right = right
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||||
# All the things from the left of the alternative, then all the things
|
# All the things from the left of the alternative, then all the things
|
||||||
# from the right, never intermingled.
|
# from the right, never intermingled.
|
||||||
yield from self.left.flatten()
|
yield from self.left.flatten()
|
||||||
|
|
@ -1655,7 +1713,7 @@ class SequenceRule(Rule):
|
||||||
self.first = first
|
self.first = first
|
||||||
self.second = second
|
self.second = second
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||||
# All the things in the prefix....
|
# All the things in the prefix....
|
||||||
for first in self.first.flatten():
|
for first in self.first.flatten():
|
||||||
# ...potentially followed by all the things in the suffix.
|
# ...potentially followed by all the things in the suffix.
|
||||||
|
|
@ -1668,7 +1726,7 @@ class NothingRule(Rule):
|
||||||
these, you're probably better off just using the singleton `Nothing`.
|
these, you're probably better off just using the singleton `Nothing`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def flatten(self) -> typing.Generator[list[str | Token], None, None]:
|
def flatten(self) -> typing.Generator[list[str | Terminal], None, None]:
|
||||||
# It's quiet in here.
|
# It's quiet in here.
|
||||||
yield []
|
yield []
|
||||||
|
|
||||||
|
|
@ -1720,10 +1778,10 @@ class Grammar:
|
||||||
|
|
||||||
Here's an example of a simple grammar:
|
Here's an example of a simple grammar:
|
||||||
|
|
||||||
PLUS = Token('+')
|
PLUS = Terminal('+')
|
||||||
LPAREN = Token('(')
|
LPAREN = Terminal('(')
|
||||||
RPAREN = Token(')')
|
RPAREN = Terminal(')')
|
||||||
ID = Token('id')
|
ID = Terminal('id')
|
||||||
|
|
||||||
class SimpleGrammar(Grammar):
|
class SimpleGrammar(Grammar):
|
||||||
@rule
|
@rule
|
||||||
|
|
@ -1745,7 +1803,7 @@ class Grammar:
|
||||||
precedence_table = {}
|
precedence_table = {}
|
||||||
for prec, (associativity, symbols) in enumerate(precedence):
|
for prec, (associativity, symbols) in enumerate(precedence):
|
||||||
for symbol in symbols:
|
for symbol in symbols:
|
||||||
if isinstance(symbol, Token):
|
if isinstance(symbol, Terminal):
|
||||||
key = symbol.value
|
key = symbol.value
|
||||||
elif isinstance(symbol, NonTerminal):
|
elif isinstance(symbol, NonTerminal):
|
||||||
key = symbol.name
|
key = symbol.name
|
||||||
|
|
@ -1758,7 +1816,7 @@ class Grammar:
|
||||||
|
|
||||||
def generate_nonterminal_dict(
|
def generate_nonterminal_dict(
|
||||||
self, start: str
|
self, start: str
|
||||||
) -> typing.Tuple[dict[str, list[list[str | Token]]], set[str]]:
|
) -> typing.Tuple[dict[str, list[list[str | Terminal]]], set[str]]:
|
||||||
"""Convert the rules into a dictionary of productions.
|
"""Convert the rules into a dictionary of productions.
|
||||||
|
|
||||||
Our table generators work on a very flat set of productions. This is the
|
Our table generators work on a very flat set of productions. This is the
|
||||||
|
|
@ -1785,7 +1843,7 @@ class Grammar:
|
||||||
body = rule.generate_body(self)
|
body = rule.generate_body(self)
|
||||||
for clause in body:
|
for clause in body:
|
||||||
for symbol in clause:
|
for symbol in clause:
|
||||||
if not isinstance(symbol, Token):
|
if not isinstance(symbol, Terminal):
|
||||||
assert isinstance(symbol, str)
|
assert isinstance(symbol, str)
|
||||||
nonterminal = nonterminals.get(symbol)
|
nonterminal = nonterminals.get(symbol)
|
||||||
if nonterminal is None:
|
if nonterminal is None:
|
||||||
|
|
@ -1811,7 +1869,7 @@ class Grammar:
|
||||||
for clause in clauses:
|
for clause in clauses:
|
||||||
new_clause = []
|
new_clause = []
|
||||||
for symbol in clause:
|
for symbol in clause:
|
||||||
if isinstance(symbol, Token):
|
if isinstance(symbol, Terminal):
|
||||||
new_clause.append(symbol.value)
|
new_clause.append(symbol.value)
|
||||||
else:
|
else:
|
||||||
new_clause.append(symbol)
|
new_clause.append(symbol)
|
||||||
|
|
@ -1842,45 +1900,6 @@ def format_node(node):
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
def format_table(generator, table: ParseTable):
|
|
||||||
"""Format a parser table so pretty."""
|
|
||||||
|
|
||||||
def format_action(state, terminal):
|
|
||||||
action = state.get(terminal, ("error",))
|
|
||||||
if action[0] == "accept":
|
|
||||||
return "accept"
|
|
||||||
elif action[0] == "shift":
|
|
||||||
return "s" + str(action[1])
|
|
||||||
elif action[0] == "error":
|
|
||||||
return ""
|
|
||||||
elif action[0] == "reduce":
|
|
||||||
return "r" + str(action[1])
|
|
||||||
|
|
||||||
terminals = list(sorted(generator.alphabet[i] for i, v in enumerate(generator.terminal) if v))
|
|
||||||
nonterminals = list(
|
|
||||||
sorted(generator.alphabet[i] for i, v in enumerate(generator.nonterminal) if v)
|
|
||||||
)
|
|
||||||
header = " | {terms} | {nts}".format(
|
|
||||||
terms=" ".join("{0: <6}".format(terminal) for terminal in terminals),
|
|
||||||
nts=" ".join("{0: <5}".format(nt) for nt in nonterminals),
|
|
||||||
)
|
|
||||||
|
|
||||||
lines = [
|
|
||||||
header,
|
|
||||||
"-" * len(header),
|
|
||||||
] + [
|
|
||||||
"{index: <3} | {actions} | {gotos}".format(
|
|
||||||
index=i,
|
|
||||||
actions=" ".join(
|
|
||||||
"{0: <6}".format(format_action(row, terminal)) for terminal in terminals
|
|
||||||
),
|
|
||||||
gotos=" ".join("{0: <5}".format(row.get(nt, ("error", ""))[1]) for nt in nonterminals),
|
|
||||||
)
|
|
||||||
for i, row in enumerate(table.states)
|
|
||||||
]
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Examples
|
# Examples
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
@ -1901,7 +1920,7 @@ def examples():
|
||||||
|
|
||||||
gen = GenerateLR0("E", grammar_simple)
|
gen = GenerateLR0("E", grammar_simple)
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
print(format_table(gen, table))
|
print(table.format())
|
||||||
tree = parse(table, ["id", "+", "(", "id", ")"])
|
tree = parse(table, ["id", "+", "(", "id", ")"])
|
||||||
print(format_node(tree) + "\n")
|
print(format_node(tree) + "\n")
|
||||||
print()
|
print()
|
||||||
|
|
@ -1954,7 +1973,7 @@ def examples():
|
||||||
gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
|
gen = GenerateSLR1("E", grammar_lr0_shift_reduce)
|
||||||
print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}")
|
print(f"Follow('E'): {str([gen.alphabet[f] for f in gen.gen_follow(gen.symbol_key['E'])])}")
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
print(format_table(gen, table))
|
print(table.format())
|
||||||
tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True)
|
tree = parse(table, ["id", "+", "(", "id", "[", "id", "]", ")"], trace=True)
|
||||||
print(format_node(tree) + "\n")
|
print(format_node(tree) + "\n")
|
||||||
print()
|
print()
|
||||||
|
|
@ -1985,7 +2004,7 @@ def examples():
|
||||||
]
|
]
|
||||||
gen = GenerateLR1("S", grammar_aho_ullman_2)
|
gen = GenerateLR1("S", grammar_aho_ullman_2)
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
print(format_table(gen, table))
|
print(table.format())
|
||||||
parse(table, ["b", "a", "a", "b"], trace=True)
|
parse(table, ["b", "a", "a", "b"], trace=True)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
@ -1993,7 +2012,7 @@ def examples():
|
||||||
print("grammar_aho_ullman_2 (LALR):")
|
print("grammar_aho_ullman_2 (LALR):")
|
||||||
gen = GenerateLALR("S", grammar_aho_ullman_2)
|
gen = GenerateLALR("S", grammar_aho_ullman_2)
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
print(format_table(gen, table))
|
print(table.format())
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# A fun LALAR grammar.
|
# A fun LALAR grammar.
|
||||||
|
|
@ -2009,7 +2028,7 @@ def examples():
|
||||||
]
|
]
|
||||||
gen = GenerateLALR("S", grammar_lalr)
|
gen = GenerateLALR("S", grammar_lalr)
|
||||||
table = gen.gen_table()
|
table = gen.gen_table()
|
||||||
print(format_table(gen, table))
|
print(table.format())
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue