diff --git a/dingus/dingus.js b/dingus/dingus.js index 4e28088..4f545da 100644 --- a/dingus/dingus.js +++ b/dingus/dingus.js @@ -129,17 +129,14 @@ function render_state(state, input_editor) { * otherwise just queue it for submission. */ function post_document(worker, kind, state, document) { - console.log("Received document", kind) if (window.localStorage) { window.localStorage.setItem(kind, document); } let new_state = {...state}; if (new_state.pending) { - console.log("Document parked", kind) new_state.next = document; } else { - console.log("Document submitted", kind) new_state.pending = document; new_state.next = null; worker.postMessage({kind, data: document}); @@ -154,7 +151,6 @@ function post_document(worker, kind, state, document) { function rotate_document(worker, kind, state) { let new_state = {...state, last: state.pending, pending: null}; if (new_state.next) { - console.log("Rotating document", kind) new_state.pending = new_state.next; new_state.next = null; worker.postMessage({kind, data: new_state.pending}); diff --git a/dingus/worker.js b/dingus/worker.js index 61eb7b1..8a5e66d 100644 --- a/dingus/worker.js +++ b/dingus/worker.js @@ -176,21 +176,19 @@ const pyodide_promise = setup_python(); async function load_grammar_module(code) { const pyodide = self.pyodide; - console.log("eval_grammar: Running"); + // console.log("Running..."); const my_fn = pyodide.globals.get("eval_grammar"); my_fn(code); my_fn.destroy(); - console.log("eval_grammar: Done"); } async function parse_document(code) { const pyodide = self.pyodide; - console.log("eval_document: Running"); + // console.log("Running..."); const my_fn = pyodide.globals.get("eval_document"); my_fn(code); my_fn.destroy(); - console.log("eval_document: Done"); } self.onmessage = async function(event) { @@ -199,10 +197,8 @@ self.onmessage = async function(event) { try { const { kind, data } = event.data; if (kind === "grammar") { - console.log("Worker received grammar") await load_grammar_module(data); } else if (kind === "input") { - console.log("Worker received input") await parse_document(data); } } catch (e) { diff --git a/examples/sql.py b/examples/sql.py index a5afb9f..06dfaca 100644 --- a/examples/sql.py +++ b/examples/sql.py @@ -1,16 +1,16 @@ from parser import * -IDENTIFIER = Terminal( - "IDENTIFIER", +NAME = Terminal( + "NAME", Re.seq( Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), ), ) -STRING_LITERAL = Terminal( - "STRING_LITERAL", +STRING = Terminal( + "STRING", Re.seq( Re.literal("'"), (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), @@ -19,16 +19,6 @@ STRING_LITERAL = Terminal( highlight=highlight.string.quoted, ) -NUMERIC_LITERAL = Terminal("NUMERIC_LITERAL", Re.set(("0", "9")).plus()) - -BLOB_LITERAL = Terminal("BLOB_TERMINAL", Re.literal("X") + STRING_LITERAL) - - -BIND_PARAMETER = Terminal( - "BIND_PARAMETER", - Re.literal("?") + Re.set(("0", "9")).star() | Re.set(":", "@", "$") + IDENTIFIER, -) - NUMBER = Terminal( "NUMBER", Re.seq( @@ -49,117 +39,64 @@ NUMBER = Terminal( OR = Terminal("OR", "or") AND = Terminal("AND", "and") NOT = Terminal("NOT", "not") +COMPARISON = Terminal( + "COMPARISON", + Re.literal("=") + | Re.literal("<>") + | Re.literal("<") + | Re.literal(">") + | Re.literal("<=") + | Re.literal(">="), +) PLUS = Terminal("PLUS", "+") MINUS = Terminal("MINUS", "-") STAR = Terminal("STAR", "*") SLASH = Terminal("SLASH", "/") -ABORT = Terminal("ABORT", "abort") -ACTION = Terminal("ACTION", "action") -ADD = Terminal("ADD", "add") -AFTER = Terminal("AFTER", "after") ALL = Terminal("ALL", "all") -ALTER = Terminal("ALTER", "alter") -ALWAYS = Terminal("ALWAYS", "always") AMMSC = Terminal("AMMSC", "ammsc") -AMPERSAND = Terminal("AMPERSAND", "&") -ANALYZE = Terminal("ANALYIZE", "analyze") ANY = Terminal("ANY", "any") AS = Terminal("AS", "as") ASC = Terminal("ASC", "asc") -ATTACH = Terminal("ATTACH", "attach") AUTHORIZATION = Terminal("AUTHORIZATION", "authorization") -AUTOINCREMENT = Terminal("AUTOINCREMENT", "autoincrement") -BEFORE = Terminal("BEFORE", "before") -BEGIN = Terminal("BEGIN", "begin") BETWEEN = Terminal("BETWEEN", "between") BY = Terminal("BY", "by") -CASCADE = Terminal("CASCADE", "cascade") -CASE = Terminal("CASE", "case") -CAST = Terminal("CAST", "cast") CHARACTER = Terminal("CHARACTER", "character") CHECK = Terminal("CHECK", "check") CLOSE = Terminal("CLOSE", "close") -COLLATE = Terminal("COLLATE", "collate") -COLUMN = Terminal("COLUMN", "column") COMMIT = Terminal("COMMIT", "commit") -CONFLICT = Terminal("CONFLICT", "conflict") -CONSTRAINT = Terminal("CONSTRAINT", "constraint") CONTINUE = Terminal("CONTINUE", "continue") CREATE = Terminal("CREATE", "create") -CROSS = Terminal("CROSS", "cross") CURRENT = Terminal("CURRENT", "current") -CURRENT_DATE = Terminal("CURRENT_DATE", "current_date") -CURRENT_TIME = Terminal("CURRENT_TIME", "current_time") -CURRENT_TIMESTAMP = Terminal("CURRENT_TIMESTAMP", "current_timestamp") CURSOR = Terminal("CURSOR", "cursor") -DATABASE = Terminal("DATABASE", "database") DECIMAL = Terminal("DECIMAL", "decimal") DECLARE = Terminal("DECLARE", "declare") DEFAULT = Terminal("DEFAULT", "default") -DEFERRABLE = Terminal("DEFERRABLE", "deferrable") -DEFERRED = Terminal("DEFERRED", "deferred") DELETE = Terminal("DELETE", "delete") DESC = Terminal("DESC", "desc") -DETACH = Terminal("DETACH", "detach") DISTINCT = Terminal("DISTINCT", "distinct") DOUBLE = Terminal("DOUBLE", "double") -DROP = Terminal("DROP", "drop") -EACH = Terminal("EACH", "each") -ELSE = Terminal("ELSE", "else") -END = Terminal("END", "end") -EQUALEQUAL = Terminal("EQUALEQUAL", "==") ESCAPE = Terminal("ESCAPE", "escape") -EXCLUSIVE = Terminal("EXCLUSIVE", "exclusive") EXISTS = Terminal("EXISTS", "exists") -EXPLAIN = Terminal("EXPLAIN", "explain") -FAIL = Terminal("FAIL", "fail") -FALSE = Terminal("FALSE", "false") FETCH = Terminal("FETCH", "fetch") FLOAT = Terminal("FLOAT", "float") FOR = Terminal("FOR", "for") FOREIGN = Terminal("FOREIGN", "foreign") FOUND = Terminal("FOUND", "found") FROM = Terminal("FROM", "from") -FULL = Terminal("FULL", "full") -GENERATED = Terminal("GENERATED", "generated") -GLOB = Terminal("GLOB", "glob") GOTO = Terminal("GOTO", "goto") GRANT = Terminal("GRANT", "grant") GROUP = Terminal("GROUP", "group") -GT = Terminal("GT", ">") -GT2 = Terminal("GT2", ">>") -GT_EQ = Terminal("GT_EQ", ">=") HAVING = Terminal("HAVING", "having") -IF = Terminal("IF", "if") -IGNORE = Terminal("IGNORE", "ignore") -IMMEDATE = Terminal("IMMEDIATE", "immedate") -IMMEDIATE = Terminal("IMMEDIATE", "immediate") IN = Terminal("IN", "in") -INDEX = Terminal("INDEX", "index") INDICATOR = Terminal("INDICATOR", "indicator") -INITIALLY = Terminal("INITIALLY", "initially") -INNER = Terminal("INNER", "inner") INSERT = Terminal("INSERT", "insert") -INSTEAD = Terminal("INSTEAD", "instead") INTEGER = Terminal("INTEGER", "integer") INTO = Terminal("INTO", "into") IS = Terminal("IS", "is") -ISNULL = Terminal("ISNULL", "isnull") # ?? -JOIN = Terminal("JOIN", "join") KEY = Terminal("KEY", "key") LANGUAGE = Terminal("LANGUAGE", "language") -LEFT = Terminal("LEFT", "left") LIKE = Terminal("LIKE", "like") -LT = Terminal("LT", "<") -LT2 = Terminal("LT2", "<<") -LT_EQ = Terminal("LT_EQ", "<=") -MATCH = Terminal("MATCH", "match") -NATURAL = Terminal("NATURAL", "natural") -NO = Terminal("NO", "no") -NOTHING = Terminal("NOTHING", "nothing") -NOTNULL = Terminal("NOTNULL", "notnull") # ?? -NOT_EQ = Terminal("NOT_EQ1", Re.literal("!=") | Re.literal("<>")) NULL = Terminal("NULL", "null") NUMERIC = Terminal("NUMERIC", "numeric") OF = Terminal("OF", "of") @@ -167,33 +104,15 @@ ON = Terminal("ON", "on") OPEN = Terminal("OPEN", "open") OPTION = Terminal("OPTION", "option") ORDER = Terminal("ORDER", "order") -OUTER = Terminal("OUTER", "outer") PARAMETER = Terminal("PARAMETER", "parameter") -PERCENT = Terminal("PERCENT", "%") -PIPE = Terminal("PIPE", "|") -PIPE2 = Terminal("PIPE2", "||") -PLAN = Terminal("PLAN", "plan") -PRAGMA = Terminal("PRAGMA", "pragma") PRECISION = Terminal("PRECISION", "precision") PRIMARY = Terminal("PRIMARY", "primary") PRIVILEGES = Terminal("PRIVILEGES", "privileges") PROCEDURE = Terminal("PROCEDURE", "procedure") PUBLIC = Terminal("PUBLIC", "public") -QUERY = Terminal("QUERY", "query") -RAISE = Terminal("RAISE", "raise") REAL = Terminal("REAL", "real") REFERENCES = Terminal("REFERENCES", "references") -REGEXP = Terminal("REGEXP", "regexp") -REINDEX = Terminal("REINDEX", "reindex") -RELEASE = Terminal("RELEASE", "release") -RENAME = Terminal("RENAME", "rename") -REPLACE = Terminal("REPLACE", "replace") -RESTRICT = Terminal("RESTRICT", "restrict") -RETURNING = Terminal("RETURNING", "returning") -RIGHT = Terminal("RIGHT", "right") ROLLBACK = Terminal("ROLLBACK", "rollback") -ROW = Terminal("ROW", "row") -SAVEPOINT = Terminal("SAVEPOINT", "savepoint") SCHEMA = Terminal("SCHEMA", "schema") SELECT = Terminal("SELECT", "select") SET = Terminal("SET", "set") @@ -201,59 +120,18 @@ SMALLINT = Terminal("SMALLINT", "smallint") SOME = Terminal("SOME", "some") SQLCODE = Terminal("SQLCODE", "sqlcode") SQLERROR = Terminal("SQLERROR", "sqlerror") -STORED = Terminal("STORED", "stored") TABLE = Terminal("TABLE", "table") -TEMP = Terminal("TEMP", "temp") -TEMPORARY = Terminal("TEMPORARY", "temporary") -THEN = Terminal("THEN", "then") TO = Terminal("TO", "to") -TRANSACTION = Terminal("TRANSACTION", "transaction") -TRIGGER = Terminal("TRIGGER", "trigger") -TRUE = Terminal("TRUE", "true") UNION = Terminal("UNION", "union") UNIQUE = Terminal("UNIQUE", "unique") UPDATE = Terminal("UPDATE", "update") USER = Terminal("USER", "user") -USING = Terminal("USING", "using") VALUES = Terminal("VALUES", "values") VIEW = Terminal("VIEW", "view") -VIRTUAL = Terminal("VIRTUAL", "virtual") -WHEN = Terminal("WHEN", "when") WHENEVER = Terminal("WHENEVER", "whenever") WHERE = Terminal("WHERE", "where") WITH = Terminal("WITH", "with") -WITHOUT = Terminal("WITHOUT", "without") WORK = Terminal("WORK", "work") -INTERSECT = Terminal("INTERSECT", "intersect") -EXCEPT = Terminal("EXCEPT", "except") -INDEXED = Terminal("INDEXED", "indexed") -VACUUM = Terminal("VACUUM", "vacuum") -FILTER = Terminal("FILTER", "filter") -PARTITION = Terminal("PARTITION", "partition") -EXCLUDE = Terminal("EXCLUDE", "exclude") -OTHERS = Terminal("OTHERS", "others") -TIES = Terminal("TIES", "ties") -RANGE = Terminal("RANGE", "range") -ROWS = Terminal("ROWS", "rows") -GROUPS = Terminal("GROUPS", "groups") -OVER = Terminal("OVER", "over") -RECURSIVE = Terminal("RECURSIVE", "recursive") -LIMIT = Terminal("LIMIT", "limit") -OFFSET = Terminal("OFFSET", "offset") -FIRST_VALUE = Terminal("FIRST_VALUE", "first_value") -LAST_VALUE = Terminal("LAST_VALUE", "last_value") -CUME_DIST = Terminal("CUME_DIST", "cume_dist") -PERCENT_RANK = Terminal("PERCENT_RANK", "percent_rank") -TILDE = Terminal("TILDE", "tilde") -DENSE_RANK = Terminal("DENSE_RANK", "dense_rank") -RANK = Terminal("RANK", "rank") -ROW_NUMBER = Terminal("ROW_NUMBER", "row_number") -LAG = Terminal("LAG", "lag") -LEAD = Terminal("LEAD", "lead") -NTH_VALUE = Terminal("NTH_VALUE", "nth_value") -NTILE = Terminal("NTILE", "ntile") -WINDOW = Terminal("WINDOW", "window") -DO = Terminal("DO", "do") SEMICOLON = Terminal("SEMICOLON", ";") LPAREN = Terminal("LPAREN", "(") @@ -282,778 +160,200 @@ def sql_list(): @rule def sql(): - return opt(EXPLAIN + opt(QUERY + PLAN)) + alt( - alter_table_stmt, - analyze_stmt, - attach_stmt, - begin_stmt, - commit_stmt, - create_index_stmt, - create_table_stmt, - create_trigger_stmt, - create_view_stmt, - create_virtual_table_stmt, - delete_stmt, - delete_stmt_limited, - detach_stmt, - drop_stmt, - insert_stmt, - pragma_stmt, - reindex_stmt, - release_stmt, - rollback_stmt, - savepoint_stmt, - select_stmt, - update_stmt, - update_stmt_limited, - vacuum_stmt, + return alt( + schema, + cursor_def, + manipulative_statement, + WHENEVER + NOT + FOUND + when_action, + WHENEVER + SQLERROR + when_action, ) @rule -def alter_table_stmt(): - return ( - ALTER - + TABLE - + opt(schema_name + DOT) - + table_name - + alt( - RENAME + alt((TO + table_name), (COLUMN + column_name + TO + column_name)), - (ADD + opt(COLUMN) + column_def), - (DROP + opt(COLUMN) + column_name), - ) - ) - - -@rule -def analyze_stmt(): - return ANALYZE + opt(alt(schema_name, opt(schema_name + DOT) + table_or_index_name)) - - -@rule -def attach_stmt(): - return ATTACH + opt(DATABASE) + expr + AS + schema_name - - -@rule -def begin_stmt(): - return BEGIN + opt(DEFERRED | IMMEDIATE | EXCLUSIVE) + opt(TRANSACTION + opt(transaction_name)) - - -@rule -def commit_stmt(): - return (COMMIT | END) + opt(TRANSACTION) - - -@rule -def rollback_stmt(): - return ROLLBACK + opt(TRANSACTION) + opt(TO + opt(SAVEPOINT) + savepoint_name) - - -@rule -def savepoint_stmt(): - return SAVEPOINT + savepoint_name - - -@rule -def release_stmt(): - return RELEASE + opt(SAVEPOINT) + savepoint_name - - -def comma_list(*rules: Rule) -> Rule: - """A list of `rule` separated by commas. Must have at least one, no trailing comma.""" - rule = seq(*rules) - return seq(rule, zero_or_more(COMMA, rule)) - - -@rule -def create_index_stmt(): +def schema(): return seq( CREATE, - opt(UNIQUE), - INDEX, - opt(IF + NOT + EXISTS), - opt(schema_name + DOT), - index_name, - ON, - table_name, - LPAREN, - comma_list(indexed_column), - RPAREN, - opt(WHERE + expr), + SCHEMA, + AUTHORIZATION, + user, + opt(schema_element_list), ) -@rule -def indexed_column(): - return (column_name | expr) + opt(COLLATE + collation_name) + opt(asc_desc) +@rule(transparent=True) +def schema_element_list() -> Rule: + return schema_element | (schema_element_list + schema_element) @rule -def create_table_stmt(): +def schema_element(): + return base_table_def | view_def | privilege_def + + +@rule +def base_table_def(): return seq( CREATE, - opt(TEMP | TEMPORARY), TABLE, - opt(IF, NOT, EXISTS), - opt(schema_name, DOT), - table_name, - alt( - seq( - LPAREN, - comma_list(column_def), - zero_or_more(COMMA, table_constraint), - RPAREN, - opt(WITHOUT, IDENTIFIER), - ), - seq(AS, select_stmt), - ), + table, + LPAREN, + base_table_element_commalist, + RPAREN, ) +@rule(transparent=True) +def base_table_element_commalist() -> Rule: + return opt(base_table_element_commalist + COMMA) + base_table_element + + +@rule(transparent=True) +def base_table_element(): + return column_def | table_constraint_def + + @rule def column_def(): - return column_name + opt(type_name) + zero_or_more(column_constraint) + return column + data_type + opt(column_def_list) -@rule -def type_name(): - return name + opt(LPAREN, signed_number, opt(COMMA, signed_number), RPAREN) - - -@rule -def column_constraint(): - return seq( - opt(CONSTRAINT, name), - alt( - seq(PRIMARY, KEY, opt(asc_desc), opt(conflict_clause), opt(AUTOINCREMENT)), - seq(opt(NOT), (NULL | UNIQUE), opt(conflict_clause)), - seq(DEFAULT, signed_number | literal_value | seq(LPAREN, expr, RPAREN)), - seq(COLLATE, collation_name), - foreign_key_clause, - seq(opt(GENERATED, ALWAYS), AS, LPAREN, expr, RPAREN, opt(STORED | VIRTUAL)), - ), +@rule(transparent=True) +def column_def_list() -> Rule: + return alt( + column_def_list + column_def_opt, + column_def_opt, ) @rule -def signed_number(): - return opt(PLUS | MINUS) + NUMERIC_LITERAL - - -@rule -def table_constraint(): - return seq( - opt(CONSTRAINT, name), - alt( - seq( - (PRIMARY + KEY | UNIQUE), - LPAREN, - comma_list(indexed_column), - RPAREN, - opt(conflict_clause), - ), - seq(CHECK, LPAREN, expr, RPAREN), - seq( - FOREIGN, - KEY, - LPAREN, - comma_list(column_name), - RPAREN, - foreign_key_clause, - ), - ), +def column_def_opt(): + return alt( + NOT + opt(NULL + opt(alt(UNIQUE, PRIMARY + KEY))), + DEFAULT + literal, + DEFAULT + NULL, + DEFAULT + USER, + CHECK + LPAREN + search_condition + RPAREN, + REFERENCES + table, + REFERENCES + table + LPAREN + column_commalist + RPAREN, ) @rule -def foreign_key_clause(): - return seq( - REFERENCES, - foreign_table, - opt(LPAREN, comma_list(column_name), RPAREN), - zero_or_more( - alt( - seq( - ON, - (DELETE | UPDATE), - alt( - SET + (NULL | DEFAULT), - CASCADE, - RESTRICT, - NO + ACTION, - ), - ), - MATCH + name, - ), +def table_constraint_def(): + return alt( + UNIQUE + LPAREN + column_commalist + RPAREN, + PRIMARY + KEY + LPAREN + column_commalist + RPAREN, + seq( + FOREIGN, + KEY, + LPAREN, + column_commalist, + RPAREN, + REFERENCES, + table, + opt(LPAREN + column_commalist + RPAREN), ), - opt(opt(NOT), DEFERRABLE, opt(INITIALLY, (DEFERRED | IMMEDIATE))), + CHECK + LPAREN + search_condition + RPAREN, ) -@rule -def conflict_clause(): - return seq(ON, CONFLICT, ROLLBACK | ABORT | FAIL | IGNORE | REPLACE) +@rule(transparent=True) +def column_commalist() -> Rule: + return opt(column_commalist + COMMA) + column @rule -def create_trigger_stmt(): +def view_def(): return seq( CREATE, - opt(TEMP | TEMPORARY), - TRIGGER, - opt(IF, NOT, EXISTS), - opt(schema_name, DOT), - trigger_name, - opt(BEFORE | AFTER | (INSTEAD + OF)), - (DELETE | INSERT | (UPDATE + opt(OF, comma_list(column_name)))), - ON, - table_name, - opt(FOR, EACH, ROW), - opt(WHEN, expr), - BEGIN, - one_or_more((update_stmt | insert_stmt | delete_stmt | select_stmt), SEMICOLON), - END, - ) - - -@rule -def create_view_stmt(): - return seq( - CREATE, - opt(TEMP | TEMPORARY), VIEW, - opt(IF, NOT, EXISTS), - opt(schema_name, DOT), - view_name, - opt(LPAREN, comma_list(column_name), RPAREN), + table, + opt(LPAREN + column_commalist + RPAREN), AS, - select_stmt, + query_spec, + opt(with_check_option), ) @rule -def create_virtual_table_stmt(): - return seq( - CREATE, - VIRTUAL, - TABLE, - opt(IF, NOT, EXISTS), - opt(schema_name, DOT), - table_name, - USING, - module_name, - opt(LPAREN, comma_list(module_argument), RPAREN), - ) - - -@rule -def with_clause(): - return seq( - WITH, - opt(RECURSIVE), - comma_list(cte_table_name, AS, LPAREN, select_stmt, RPAREN), - ) - - -@rule -def cte_table_name(): - return table_name + opt(LPAREN, comma_list(column_name), RPAREN) - - -@rule -def recursive_cte(): - return seq( - cte_table_name, - AS, - LPAREN, - initial_select, - UNION, - opt(ALL), - recursive_select, - RPAREN, - ) - - -@rule -def common_table_expression(): - return seq( - table_name, - opt(LPAREN, comma_list(column_name), RPAREN), - AS, - LPAREN, - select_stmt, - RPAREN, - ) - - -@rule -def delete_stmt(): - return seq( - opt(with_clause), - DELETE, - FROM, - qualified_table_name, - opt(WHERE, expr), - opt(returning_clause), - ) - - -@rule -def delete_stmt_limited(): - return seq( - opt(with_clause), - DELETE, - FROM, - qualified_table_name, - opt(WHERE, expr), - opt(returning_clause), - opt( - opt(order_by_stmt), - limit_stmt, - ), - ) - - -@rule -def detach_stmt(): - return DETACH + opt(DATABASE) + schema_name - - -@rule -def drop_stmt(): - return seq( - DROP, - (INDEX | TABLE | TRIGGER | VIEW), - opt(IF, EXISTS), - opt(schema_name, DOT), - any_name, - ) - - -# -# SQLite understands the following binary operators, in order from highest to lowest precedence: -# || -# * / % -# + - -# << >> & | -# < <= > >= -# = == != <> IS IS NOT IS DISTINCT FROM IS NOT DISTINCT FROM IN LIKE GLOB MATCH REGEXP -# AND -# OR -# -@rule -def expr(): - return alt( - literal_value, - BIND_PARAMETER, - opt(opt(schema_name, DOT), table_name, DOT) + column_name, - unary_operator + expr, - expr + PIPE2 + expr, - expr + (STAR | SLASH | PERCENT) + expr, - expr + (PLUS | MINUS) + expr, - expr + (LT2 | GT2 | AMPERSAND | PIPE) + expr, - expr + (LT | LT_EQ | GT | GT_EQ) + expr, - seq( - expr, - alt( - EQUAL, - EQUALEQUAL, - NOT_EQ, - IS, - seq(IS, NOT), - seq(IS, opt(NOT), DISTINCT, FROM), - IN, - LIKE, - GLOB, - MATCH, - REGEXP, - ), - expr, - ), - expr + AND + expr, - expr + OR + expr, - seq( - function_name, - LPAREN, - opt((opt(DISTINCT) + comma_list(expr)) | STAR), - RPAREN, - opt(filter_clause), - opt(over_clause), - ), - LPAREN + comma_list(expr) + RPAREN, - CAST + LPAREN + expr + AS + type_name + RPAREN, - expr + COLLATE + collation_name, - expr + opt(NOT) + (LIKE | GLOB | REGEXP | MATCH) + expr + opt(ESCAPE, expr), - expr + (ISNULL | NOTNULL | seq(NOT, NULL)), - expr + IS + opt(NOT) + expr, - expr + opt(NOT) + BETWEEN + expr + AND + expr, - seq( - expr, - opt(NOT), - IN, - alt( - LPAREN + opt(select_stmt | comma_list(expr)) + RPAREN, - opt(schema_name, DOT) + table_name, - seq( - opt(schema_name, DOT), - table_function_name, - LPAREN, - opt(comma_list(expr)), - RPAREN, - ), - ), - ), - opt(opt(NOT), EXISTS) + LPAREN + select_stmt + RPAREN, - CASE + opt(expr) + one_or_more(WHEN, expr, THEN, expr) + opt(ELSE, expr) + END, - raise_function, - ) - - -@rule -def raise_function(): - return seq( - RAISE, LPAREN, (IGNORE | seq((ROLLBACK | ABORT | FAIL), COMMA, error_message)), RPAREN - ) - - -@rule -def literal_value(): - return alt( - NUMERIC_LITERAL, - STRING_LITERAL, - BLOB_LITERAL, - NULL, - TRUE, - FALSE, - CURRENT_TIME, - CURRENT_DATE, - CURRENT_TIMESTAMP, - ) - - -@rule -def value_row(): - return LPAREN + comma_list(expr) + RPAREN - - -@rule -def values_clause(): - return VALUES + comma_list(value_row) - - -@rule -def insert_stmt(): - return seq( - opt(with_clause), - INSERT | REPLACE | seq(INSERT, OR, REPLACE | ROLLBACK | ABORT | FAIL | IGNORE), - INTO, - opt(schema_name, DOT), - table_name, - opt(AS, table_alias), - opt(LPAREN, comma_list(column_name), RPAREN), - (((values_clause | select_stmt) + opt(upsert_clause)) | seq(DEFAULT, VALUES)), - opt(returning_clause), - ) - - -@rule -def returning_clause(): - return RETURNING + comma_list(result_column) - - -@rule -def upsert_clause(): +def with_check_option(): + return WITH + CHECK + OPTION + + +@rule +def privilege_def(): return seq( + GRANT, + privileges, ON, - CONFLICT, - opt(LPAREN, comma_list(indexed_column), RPAREN, opt(WHERE, expr)), - DO, - alt( - NOTHING, - seq( - UPDATE, - SET, - comma_list((column_name | column_name_list), EQUAL, expr), - opt(WHERE, expr), - ), - ), + table, + TO, + grantee_commalist, + opt(with_grant_option), ) @rule -def pragma_stmt(): - return seq( - PRAGMA, - opt(schema_name, DOT), - pragma_name, - opt((EQUAL + pragma_value) | (LPAREN + pragma_value + RPAREN)), - ) +def with_grant_option(): + return WITH + GRANT + OPTION @rule -def pragma_value(): - return signed_number | name | STRING_LITERAL +def privileges(): + return (ALL + PRIVILEGES) | ALL | operation_commalist + + +@rule(transparent=True) +def operation_commalist() -> Rule: + return opt(operation_commalist + COMMA) + operation @rule -def reindex_stmt(): - return REINDEX + opt(collation_name | (opt(schema_name, DOT) + (table_name | index_name))) - - -@rule -def select_stmt(): - return seq( - opt(common_table_stmt), - select_core, - zero_or_more(compound_operator, select_core), - opt(order_by_stmt), - opt(limit_stmt), - ) - - -@rule -def join_clause(): - return table_or_subquery + zero_or_more(join_operator, table_or_subquery, opt(join_constraint)) - - -@rule -def select_core(): +def operation(): return alt( - seq( - SELECT, - opt(DISTINCT | ALL), - comma_list(result_column), - opt(FROM, comma_list(table_or_subquery) | join_clause), - opt(WHERE, expr), - opt(GROUP, BY, comma_list(expr), opt(HAVING, expr)), - opt(WINDOW, comma_list(window_name, AS, window_defn)), - ), - values_clause, + SELECT, + INSERT, + DELETE, + UPDATE + opt(LPAREN + column_commalist + RPAREN), + REFERENCES + opt(LPAREN + column_commalist + RPAREN), ) -@rule -def factored_select_stmt(): - return select_stmt +@rule(transparent=True) +def grantee_commalist() -> Rule: + return opt(grantee_commalist + COMMA) + grantee @rule -def simple_select_stmt(): - return opt(common_table_stmt) + select_core + opt(order_by_stmt) + opt(limit_stmt) +def grantee(): + return PUBLIC | user @rule -def compound_select_stmt(): +def cursor_def(): return seq( - opt(common_table_stmt), - select_core, - one_or_more((UNION + ALL) | INTERSECT | EXCEPT, select_core), - opt(order_by_stmt), - opt(limit_stmt), + DECLARE, + cursor, + CURSOR, + FOR, + query_exp, + opt(order_by_clause), ) @rule -def table_or_subquery(): - return alt( - seq( - opt(schema_name, DOT), - table_name, - opt(opt(AS), table_alias), - opt(seq(INDEXED, BY, index_name) | (NOT + INDEXED)), - ), - seq( - opt(schema_name, DOT), - table_function_name, - LPAREN, - comma_list(expr), - RPAREN, - opt(AS, table_alias), - ), - seq(LPAREN, comma_list(table_or_subquery) | join_clause, RPAREN), - seq(LPAREN, select_stmt, RPAREN, opt(opt(AS), table_alias)), - ) +def order_by_clause(): + return ORDER + BY + ordering_spec_commalist @rule -def result_column(): - return STAR | seq(table_name, DOT, STAR) | seq(expr, opt(opt(AS), column_alias)) +def ordering_spec_commalist() -> Rule: + return opt(ordering_spec_commalist + COMMA) + ordering_spec @rule -def join_operator(): - return alt( - COMMA, - seq(opt(NATURAL), opt(seq(LEFT | RIGHT | FULL, opt(OUTER)) | INNER | CROSS), JOIN), - ) - - -@rule -def join_constraint(): - return alt( - ON + expr, - USING + LPAREN + comma_list(column_name) + RPAREN, - ) - - -@rule -def compound_operator(): - return UNION + opt(ALL) | INTERSECT | EXCEPT - - -@rule -def update_stmt(): - return seq( - opt(with_clause), - UPDATE, - opt(OR, ROLLBACK | ABORT | REPLACE | FAIL | IGNORE), - qualified_table_name, - SET, - comma_list(column_name | column_name_list, EQUAL, expr), - opt(FROM, comma_list(table_or_subquery) | join_clause), - opt(WHERE, expr), - opt(returning_clause), - ) - - -@rule -def column_name_list(): - return LPAREN + comma_list(column_name) + RPAREN - - -@rule -def update_stmt_limited(): - return seq( - opt(with_clause), - UPDATE, - opt(OR, ROLLBACK | ABORT | REPLACE | FAIL | IGNORE), - qualified_table_name, - SET, - comma_list(column_name | column_name_list, EQUAL, expr), - opt(WHERE, expr), - opt(returning_clause), - opt(opt(order_by_stmt), limit_stmt), - ) - - -@rule -def qualified_table_name(): - return seq( - opt(schema_name, DOT), - table_name, - opt(AS, alias), - opt(INDEXED + BY + index_name | NOT + INDEXED), - ) - - -@rule -def vacuum_stmt(): - return VACUUM + opt(schema_name) + opt(INTO, filename) - - -@rule -def filter_clause(): - return FILTER + LPAREN + WHERE + expr + RPAREN - - -@rule -def window_defn(): - return seq( - LPAREN, - opt(base_window_name), - opt(PARTITION, BY, comma_list(expr)), - ORDER, - BY, - comma_list(ordering_term), - opt(frame_spec), - RPAREN, - ) - - -@rule -def over_clause(): - return seq( - OVER, - alt( - window_name, - seq( - LPAREN, - opt(base_window_name), - opt(PARTITION, BY, comma_list(expr)), - opt(ORDER, BY, comma_list(ordering_term)), - opt(frame_spec), - RPAREN, - ), - ), - ) - - -@rule -def frame_spec(): - return frame_clause + opt(EXCLUDE, NO + OTHERS | CURRENT + ROW | GROUP | TIES) - - -@rule -def frame_clause(): - return seq( - RANGE | ROWS | GROUPS, - frame_single | seq(BETWEEN, frame_left, AND, frame_right), - ) - - -@rule -def simple_function_invocation(): - return seq(simple_func, LPAREN, comma_list(expr) | STAR, RPAREN) - - -@rule -def aggregate_function_invocation(): - return seq( - aggregate_func, - LPAREN, - opt(opt(DISTINCT), comma_list(expr) | STAR), - RPAREN, - opt(filter_clause), - ) - - -@rule -def window_function_invocation(): - return seq( - window_function, - LPAREN, - opt(comma_list(expr) | STAR), - RPAREN, - opt(filter_clause), - OVER, - window_defn | window_name, - ) - - -@rule -def common_table_stmt(): - return seq(WITH, opt(RECURSIVE), comma_list(common_table_expression)) - - -@rule -def order_by_stmt(): - return seq(ORDER, BY, comma_list(ordering_term)) - - -@rule -def limit_stmt(): - return seq(LIMIT, expr, opt(OFFSET | COMMA, expr)) - - -NULLS = Terminal("NULLS", "nulls") -FIRST = Terminal("FIRST", "first") -LAST = Terminal("LAST", "last") - - -@rule -def ordering_term(): - return seq(expr, opt(COLLATE, collation_name), opt(asc_desc), opt(NULLS, FIRST | LAST)) +def ordering_spec(): + return (NUMBER + opt(asc_desc)) | (column_ref + opt(asc_desc)) @rule @@ -1061,425 +361,388 @@ def asc_desc(): return ASC | DESC -PRECEDING = Terminal("PRECEDING", "preceding") -FOLLOWING = Terminal("FOLLOWING", "following") -UNBOUNDED = Terminal("UNBOUNDED", "unbounded") - - @rule -def frame_left(): +def manipulative_statement(): return alt( - expr + PRECEDING, - expr + FOLLOWING, - CURRENT + ROW, - UNBOUNDED + PRECEDING, + close_statement, + commit_statement, + delete_statement_positioned, + delete_statement_searched, + fetch_statement, + insert_statement, + open_statement, + rollback_statement, + select_statement, + update_statement_positioned, + update_statement_searched, ) @rule -def frame_right(): - return alt( - expr + PRECEDING, - expr + FOLLOWING, - CURRENT + ROW, - UNBOUNDED + FOLLOWING, - ) +def close_statement(): + return CLOSE + cursor @rule -def frame_single(): - return alt( - expr + PRECEDING, - UNBOUNDED + PRECEDING, - CURRENT + ROW, - ) +def commit_statement(): + return COMMIT + opt(WORK) @rule -def window_function(): - return alt( - seq( - FIRST_VALUE | LAST_VALUE, - seq(LPAREN, expr, RPAREN), - OVER, - seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, opt(frame_clause), RPAREN), - ), - seq( - CUME_DIST | PERCENT_RANK, - seq(LPAREN, RPAREN), - OVER, - seq(LPAREN, opt(partition_by), opt(order_by_expr), RPAREN), - ), - seq( - DENSE_RANK | RANK | ROW_NUMBER, - seq(LPAREN, RPAREN), - OVER, - seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN), - ), - seq( - LAG | LEAD, - seq(LPAREN, expr, opt(offset), opt(default_value), RPAREN), - OVER, - seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN), - ), - seq( - NTH_VALUE, - seq(LPAREN, expr, COMMA, signed_number, RPAREN), - OVER, - seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, opt(frame_clause), RPAREN), - ), - seq( - NTILE, - seq(LPAREN, expr, RPAREN), - OVER, - seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN), - ), - ) +def delete_statement_positioned(): + return DELETE + FROM + table + WHERE + CURRENT + OF + cursor @rule -def offset(): - return COMMA + signed_number +def delete_statement_searched(): + return DELETE + FROM + table + opt(where_clause) @rule -def default_value(): - return COMMA + signed_number +def fetch_statement(): + return FETCH + cursor + INTO + target_commalist @rule -def partition_by(): - return PARTITION + BY + one_or_more(expr) - - -@rule -def order_by_expr(): - return ORDER + BY + one_or_more(expr) - - -@rule -def order_by_expr_asc_desc(): - return ORDER + BY + expr_asc_desc - - -@rule -def expr_asc_desc(): - return comma_list(expr, opt(asc_desc)) - - -# TODO BOTH OF THESE HAVE TO BE REWORKED TO FOLLOW THE SPEC -@rule -def initial_select(): - return select_stmt - - -@rule -def recursive_select(): - return select_stmt - - -@rule -def unary_operator(): - return MINUS | PLUS | TILDE | NOT - - -@rule -def error_message(): - return STRING_LITERAL - - -@rule -def module_argument(): # TODO check what exactly is permitted here - return expr | column_def - - -@rule -def column_alias(): - return IDENTIFIER | STRING_LITERAL - - -@rule -def keyword(): - return alt( - ABORT, - ACTION, - ADD, - AFTER, - ALL, - ALTER, - ANALYZE, - AND, - AS, - ASC, - ATTACH, - AUTOINCREMENT, - BEFORE, - BEGIN, - BETWEEN, - BY, - CASCADE, - CASE, - CAST, - CHECK, - COLLATE, - COLUMN, - COMMIT, - CONFLICT, - CONSTRAINT, - CREATE, - CROSS, - CURRENT_DATE, - CURRENT_TIME, - CURRENT_TIMESTAMP, - DATABASE, - DEFAULT, - DEFERRABLE, - DEFERRED, - DELETE, - DESC, - DETACH, - DISTINCT, - DROP, - EACH, - ELSE, - END, - ESCAPE, - EXCEPT, - EXCLUSIVE, - EXISTS, - EXPLAIN, - FAIL, - FOR, - FOREIGN, - FROM, - FULL, - GLOB, - GROUP, - HAVING, - IF, - IGNORE, - IMMEDIATE, - IN, - INDEX, - INDEXED, - INITIALLY, - INNER, +def insert_statement(): + return seq( INSERT, - INSTEAD, - INTERSECT, INTO, - IS, - ISNULL, - JOIN, - KEY, - LEFT, - LIKE, - LIMIT, - MATCH, - NATURAL, - NO, - NOT, - NOTNULL, - NULL, - OF, - OFFSET, - ON, - OR, - ORDER, - OUTER, - PLAN, - PRAGMA, - PRIMARY, - QUERY, - RAISE, - RECURSIVE, - REFERENCES, - REGEXP, - REINDEX, - RELEASE, - RENAME, - REPLACE, - RESTRICT, - RIGHT, - ROLLBACK, - ROW, - ROWS, - SAVEPOINT, - SELECT, - SET, - TABLE, - TEMP, - TEMPORARY, - THEN, - TO, - TRANSACTION, - TRIGGER, - UNION, - UNIQUE, - UPDATE, - USING, - VACUUM, - VALUES, - VIEW, - VIRTUAL, - WHEN, - WHERE, - WITH, - WITHOUT, - FIRST_VALUE, - OVER, - PARTITION, - RANGE, - PRECEDING, - UNBOUNDED, - CURRENT, - FOLLOWING, - CUME_DIST, - DENSE_RANK, - LAG, - LAST_VALUE, - LEAD, - NTH_VALUE, - NTILE, - PERCENT_RANK, - RANK, - ROW_NUMBER, - GENERATED, - ALWAYS, - STORED, - TRUE, - FALSE, - WINDOW, - NULLS, - FIRST, - LAST, - FILTER, - GROUPS, - EXCLUDE, + table, + opt(LPAREN, column_commalist, RPAREN), + values_or_query_spec, ) -# TODO: check all names below +@rule +def values_or_query_spec(): + return alt( + VALUES + LPAREN + insert_atom_commalist + RPAREN, + query_spec, + ) + + +@rule(transparent=True) +def insert_atom_commalist() -> Rule: + return opt(insert_atom_commalist + COMMA) + insert_atom @rule -def name(): - return any_name +def insert_atom(): + return atom | NULL @rule -def function_name(): - return any_name +def open_statement(): + return OPEN + cursor @rule -def schema_name(): - return any_name +def rollback_statement(): + return ROLLBACK + opt(WORK) @rule -def table_name(): - return any_name +def select_statement(): + return seq( + SELECT, + opt(all_distinct), + selection, + INTO, + target_commalist, + table_exp, + ) + + +@rule(transparent=True) +def all_distinct(): + return ALL | DISTINCT @rule -def table_or_index_name(): - return any_name +def update_statement_positioned(): + return UPDATE + table + SET + assignment_commalist + WHERE + CURRENT + OF + cursor + + +@rule(transparent=True) +def assignment_commalist() -> Rule: + return opt(assignment_commalist + COMMA) + assignment @rule -def column_name(): - return any_name +def assignment(): + return column + EQUAL + (scalar_exp | NULL) @rule -def collation_name(): - return any_name +def update_statement_searched(): + return UPDATE + table + SET + assignment_commalist + opt(where_clause) + + +@rule(transparent=True) +def target_commalist() -> Rule: + # TODO: So many commalists, it would be great if we could make this a + # macro or something. + return opt(target_commalist + COMMA) + target @rule -def foreign_table(): - return any_name +def target(): + return parameter_ref + + +# /* query expressions */ @rule -def index_name(): - return any_name +def query_exp() -> Rule: + return query_term | (query_exp + UNION + opt(ALL) + query_term) @rule -def trigger_name(): - return any_name +def query_term(): + return query_spec | (LPAREN + query_exp + RPAREN) @rule -def view_name(): - return any_name +def query_spec(): + return SELECT + opt(all_distinct) + selection + table_exp @rule -def module_name(): - return any_name +def selection(): + return scalar_exp_commalist | STAR @rule -def pragma_name(): - return any_name +def table_exp(): + return from_clause + opt(where_clause) + opt(group_by_clause) + opt(having_clause) @rule -def savepoint_name(): - return any_name +def from_clause(): + return FROM + table_ref_commalist + + +@rule(transparent=True) +def table_ref_commalist() -> Rule: + return opt(table_ref_commalist + COMMA) + table_ref @rule -def table_alias(): - return any_name +def table_ref(): + return table + opt(range_variable) @rule -def transaction_name(): - return any_name +def where_clause(): + return WHERE + search_condition @rule -def window_name(): - return any_name +def group_by_clause(): + return GROUP + BY + column_ref_commalist + + +@rule(transparent=True) +def column_ref_commalist() -> Rule: + return opt(column_ref_commalist + COMMA) + column_ref @rule -def alias(): - return any_name +def having_clause(): + return HAVING + search_condition + + +# /* search conditions */ @rule -def filename(): - return any_name +def search_condition() -> Rule: + return alt( + search_condition + OR + search_condition, + search_condition + AND + search_condition, + NOT + search_condition, + LPAREN + search_condition + RPAREN, + predicate, + ) @rule -def base_window_name(): - return any_name +def predicate(): + return alt( + comparison_predicate, + between_predicate, + like_predicate, + test_for_null, + in_predicate, + all_or_any_predicate, + existence_test, + ) @rule -def simple_func(): - return any_name +def comparison_predicate(): + return scalar_exp + COMPARISON + (scalar_exp | subquery) @rule -def aggregate_func(): - return any_name +def between_predicate(): + return scalar_exp + opt(NOT) + BETWEEN + scalar_exp + AND + scalar_exp @rule -def table_function_name(): - return any_name +def like_predicate(): + return scalar_exp + opt(NOT) + LIKE + atom + opt(escape) @rule -def any_name(): - return IDENTIFIER | keyword | STRING_LITERAL | seq(LPAREN, any_name, RPAREN) +def escape(): + return ESCAPE + atom + + +@rule +def test_for_null(): + return column_ref + IS + opt(NOT) + NULL + + +@rule +def in_predicate(): + return scalar_exp + opt(NOT) + IN + LPAREN + alt(subquery | atom_commalist) + RPAREN + + +@rule +def atom_commalist() -> Rule: + return opt(atom_commalist + COMMA) + atom + + +@rule +def all_or_any_predicate(): + return scalar_exp + COMPARISON + any_all_some + subquery + + +@rule(transparent=True) +def any_all_some(): + return ANY | ALL | SOME + + +@rule +def existence_test(): + return EXISTS + subquery + + +@rule +def subquery(): + return LPAREN + SELECT + opt(all_distinct) + selection + table_exp + RPAREN + + +# /* scalar expressions */ + + +@rule +def scalar_exp(): + return alt( + scalar_exp + (PLUS | MINUS | STAR | SLASH) + scalar_exp, + PLUS + scalar_exp, + MINUS + scalar_exp, + atom, + column_ref, + function_ref, + LPAREN + scalar_exp + RPAREN, + ) + + +@rule +def scalar_exp_commalist() -> Rule: + return opt(scalar_exp_commalist + COMMA) + scalar_exp + + +@rule +def atom(): + return parameter_ref | literal | USER + + +@rule +def parameter_ref(): + return parameter | (parameter + parameter) | (parameter + INDICATOR + parameter) + + +@rule +def function_ref(): + return alt( + AMMSC + LPAREN + STAR + RPAREN, + AMMSC + LPAREN + DISTINCT + column_ref + RPAREN, + AMMSC + LPAREN + ALL + scalar_exp + RPAREN, + AMMSC + LPAREN + scalar_exp + RPAREN, + ) + + +@rule +def literal(): + return STRING | NUMBER + + +# /* miscellaneous */ + + +@rule +def table(): + return opt(NAME + DOT) + NAME + + +@rule +def column_ref(): + return opt(opt(NAME + DOT) + NAME + DOT) + NAME + + +# /* data types */ + + +@rule +def data_type(): + return alt( + CHARACTER + opt(LPAREN + NUMBER + RPAREN), + NUMERIC + opt(LPAREN + NUMBER + opt(COMMA + NUMBER) + RPAREN), + DECIMAL + opt(LPAREN + NUMBER + opt(COMMA + NUMBER) + RPAREN), + INTEGER, + SMALLINT, + FLOAT + opt(LPAREN + NUMBER + RPAREN), + REAL, + DOUBLE + PRECISION, + ) + + +# /* the various things you can name */ + + +@rule +def column(): + return NAME + + +@rule +def cursor(): + return NAME + + +@rule +def parameter(): + return PARAMETER # :name handled in parser??? + + +@rule +def range_variable(): + return NAME + + +@rule +def user(): + return NAME + + +@rule +def when_action(): + return (GOTO + NAME) | CONTINUE SQL = Grammar( @@ -1488,7 +751,7 @@ SQL = Grammar( (Assoc.LEFT, [OR]), (Assoc.LEFT, [AND]), (Assoc.LEFT, [NOT]), - (Assoc.LEFT, []), + (Assoc.LEFT, [COMPARISON]), (Assoc.LEFT, [PLUS, MINUS]), (Assoc.LEFT, [STAR, SLASH]), # TODO: Unary minus @@ -1497,13 +760,6 @@ SQL = Grammar( name="SQL", ) -if __name__ == "__main__": - import cProfile - - print("Starting...") - with cProfile.Profile() as pr: - try: - SQL.build_table() - finally: - pr.dump_stats("sql.pprof") - print("Wrote output to sql.pprof") +if __name__=="__main__": + tbl = SQL.build_table() + print(tbl.format()) diff --git a/parser/parser.py b/parser/parser.py index cc71f82..a6ff2d0 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1116,9 +1116,7 @@ class ParserGenerator: # Check to make sure they didn't use anything that will give us # heartburn later. - reserved = [ - a for a in alphabet if (a.startswith("__") and not a.startswith("__gen_")) or a == "$" - ] + reserved = [a for a in alphabet if (a.startswith("__") and not a.startswith("__gen_")) or a == "$"] if reserved: raise ValueError( "Can't use {symbols} in grammars, {what} reserved.".format( @@ -1457,18 +1455,7 @@ class ParserGenerator: lookahead.update(context) for rule in rules: - if len(rule) == 0: - next = None - else: - next = rule[0] - - new_core = Configuration( - name=config_next, - symbols=rule, - position=0, - next=next, - ) - + new_core = Configuration.from_rule(config_next, rule) todo.append((new_core, lookahead)) return ItemSet(closure) @@ -1632,7 +1619,6 @@ class Terminal(Rule): _CURRENT_DEFINITION: str = "__global" _CURRENT_GEN_INDEX: int = 0 - class NonTerminal(Rule): """A non-terminal, or a production, in the grammar. @@ -1713,6 +1699,7 @@ class NonTerminal(Rule): _CURRENT_DEFINITION = prev_defn _CURRENT_GEN_INDEX = prev_idx + return self._body def flatten( @@ -1820,32 +1807,22 @@ def seq(*args: Rule) -> Rule: def opt(*args: Rule) -> Rule: - """Mark a sequence as optional.""" return AlternativeRule(seq(*args), Nothing) def mark(rule: Rule, **kwargs) -> Rule: - """Mark the specified rules with metadata.""" return MetadataRule(rule, kwargs) -def one_or_more(*args: Rule) -> Rule: - """Generate a rule that matches a repetition of one or more of the specified - rule. - - The resulting list is transparent, i.e., in the parse tree all of the members - of the list will be in-line with the parent. If you want to name the list - create a named nonterminal to contain it. - """ +def one_or_more(r: Rule) -> Rule: global _CURRENT_DEFINITION global _CURRENT_GEN_INDEX - tail: NonTerminal | None = None - + tail : NonTerminal | None = None def impl() -> Rule: nonlocal tail - assert tail is not None - return opt(tail) + seq(*args) + assert(tail is not None) + return opt(tail) + r tail = NonTerminal( fn=impl, @@ -1856,17 +1833,8 @@ def one_or_more(*args: Rule) -> Rule: return tail - -def zero_or_more(*args: Rule) -> Rule: - """Generate a rule that matches a repetition of zero or more of the specified - rule. - - The resulting list is transparent, i.e., in the parse tree all of the members - of the list will be in-line with the parent. If you want to name the list - create a named nonterminal to contain it. - """ - return opt(one_or_more(*args)) - +def zero_or_more(r:Rule) -> Rule: + return opt(one_or_more(r)) @typing.overload def rule(f: typing.Callable, /) -> NonTerminal: ... @@ -2187,25 +2155,11 @@ class Re: def question(self) -> "Re": return ReQuestion(self) - def __or__(self, value: "Re | Terminal", /) -> "Re": - if isinstance(value, Re): - other = value - elif isinstance(value.pattern, Re): - other = value.pattern - else: - other = Re.literal(value.pattern) + def __or__(self, value: "Re", /) -> "Re": + return ReAlt(self, value) - return ReAlt(self, other) - - def __add__(self, value: "Re | Terminal") -> "Re": - if isinstance(value, Re): - other = value - elif isinstance(value.pattern, Re): - other = value.pattern - else: - other = Re.literal(value.pattern) - - return ReSeq(self, other) + def __add__(self, value: "Re") -> "Re": + return ReSeq(self, value) UNICODE_MAX_CP = 1114112 @@ -2864,12 +2818,9 @@ class TriviaMode(enum.Enum): # Finally, the grammar class. ############################################################################### -PrecedenceList = list[typing.Tuple[Assoc, list[Terminal | NonTerminal]]] +PrecedenceList = list[typing.Tuple[Assoc, list[Terminal|NonTerminal]]] - -def gather_grammar( - start: NonTerminal, trivia: list[Terminal] -) -> tuple[dict[str, NonTerminal], dict[str, Terminal]]: +def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str,NonTerminal], dict[str,Terminal]]: """Starting from the given NonTerminal, gather all of the symbols (NonTerminals and Terminals) that make up the grammar. """ @@ -2913,11 +2864,9 @@ def gather_grammar( existing = named_rules.get(rule.name) if existing is not None: # TODO TEST - raise ValueError( - f"""Found more than one rule named {rule.name}: + raise ValueError(f"""Found more than one rule named {rule.name}: - {existing.definition_location} -- {rule.definition_location}""" - ) +- {rule.definition_location}""") named_rules[rule.name] = rule named_terminals: dict[str, Terminal] = {} @@ -2925,20 +2874,16 @@ def gather_grammar( existing = named_terminals.get(terminal.name) if existing is not None: # TODO TEST - raise ValueError( - f"""Found more than one terminal named {terminal.name}: + raise ValueError(f"""Found more than one terminal named {terminal.name}: - {existing.definition_location} -- {terminal.definition_location}""" - ) +- {terminal.definition_location}""") existing_rule = named_rules.get(terminal.name) if existing_rule is not None: # TODO TEST - raise ValueError( - f"""Found a terminal and a rule both named {terminal.name}: + raise ValueError(f"""Found a terminal and a rule both named {terminal.name}: - The rule was defined at {existing_rule.definition_location} -- The terminal was defined at {terminal.definition_location}""" - ) +- The terminal was defined at {terminal.definition_location}""") named_terminals[terminal.name] = terminal @@ -3035,7 +2980,7 @@ class Grammar: generate_nonterminal_dict- less useful to people, probably, but it is the input form needed by the Generator. """ - grammar: list[tuple[str, list[str]]] = [ + grammar: list[tuple[str,list[str]]] = [ (rule.name, [s.name for s in production]) for rule in self._nonterminals.values() for production in rule.body