From 2d10e91f9ea2a795420f317c6e213297997d2b9f Mon Sep 17 00:00:00 2001 From: John Doty Date: Fri, 15 Nov 2024 19:46:51 -0800 Subject: [PATCH] [sql] SQL "works" but it's too dang slow --- dingus/dingus.js | 4 + dingus/worker.js | 8 +- examples/sql.py | 1618 +++++++++++++++++++++++++++++++++------------- parser/parser.py | 40 +- 4 files changed, 1218 insertions(+), 452 deletions(-) diff --git a/dingus/dingus.js b/dingus/dingus.js index 4f545da..4e28088 100644 --- a/dingus/dingus.js +++ b/dingus/dingus.js @@ -129,14 +129,17 @@ function render_state(state, input_editor) { * otherwise just queue it for submission. */ function post_document(worker, kind, state, document) { + console.log("Received document", kind) if (window.localStorage) { window.localStorage.setItem(kind, document); } let new_state = {...state}; if (new_state.pending) { + console.log("Document parked", kind) new_state.next = document; } else { + console.log("Document submitted", kind) new_state.pending = document; new_state.next = null; worker.postMessage({kind, data: document}); @@ -151,6 +154,7 @@ function post_document(worker, kind, state, document) { function rotate_document(worker, kind, state) { let new_state = {...state, last: state.pending, pending: null}; if (new_state.next) { + console.log("Rotating document", kind) new_state.pending = new_state.next; new_state.next = null; worker.postMessage({kind, data: new_state.pending}); diff --git a/dingus/worker.js b/dingus/worker.js index 8a5e66d..61eb7b1 100644 --- a/dingus/worker.js +++ b/dingus/worker.js @@ -176,19 +176,21 @@ const pyodide_promise = setup_python(); async function load_grammar_module(code) { const pyodide = self.pyodide; - // console.log("Running..."); + console.log("eval_grammar: Running"); const my_fn = pyodide.globals.get("eval_grammar"); my_fn(code); my_fn.destroy(); + console.log("eval_grammar: Done"); } async function parse_document(code) { const pyodide = self.pyodide; - // console.log("Running..."); + console.log("eval_document: Running"); const my_fn = pyodide.globals.get("eval_document"); my_fn(code); my_fn.destroy(); + console.log("eval_document: Done"); } self.onmessage = async function(event) { @@ -197,8 +199,10 @@ self.onmessage = async function(event) { try { const { kind, data } = event.data; if (kind === "grammar") { + console.log("Worker received grammar") await load_grammar_module(data); } else if (kind === "input") { + console.log("Worker received input") await parse_document(data); } } catch (e) { diff --git a/examples/sql.py b/examples/sql.py index 06dfaca..a5afb9f 100644 --- a/examples/sql.py +++ b/examples/sql.py @@ -1,16 +1,16 @@ from parser import * -NAME = Terminal( - "NAME", +IDENTIFIER = Terminal( + "IDENTIFIER", Re.seq( Re.set(("a", "z"), ("A", "Z"), "_"), Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), ), ) -STRING = Terminal( - "STRING", +STRING_LITERAL = Terminal( + "STRING_LITERAL", Re.seq( Re.literal("'"), (~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(), @@ -19,6 +19,16 @@ STRING = Terminal( highlight=highlight.string.quoted, ) +NUMERIC_LITERAL = Terminal("NUMERIC_LITERAL", Re.set(("0", "9")).plus()) + +BLOB_LITERAL = Terminal("BLOB_TERMINAL", Re.literal("X") + STRING_LITERAL) + + +BIND_PARAMETER = Terminal( + "BIND_PARAMETER", + Re.literal("?") + Re.set(("0", "9")).star() | Re.set(":", "@", "$") + IDENTIFIER, +) + NUMBER = Terminal( "NUMBER", Re.seq( @@ -39,64 +49,117 @@ NUMBER = Terminal( OR = Terminal("OR", "or") AND = Terminal("AND", "and") NOT = Terminal("NOT", "not") -COMPARISON = Terminal( - "COMPARISON", - Re.literal("=") - | Re.literal("<>") - | Re.literal("<") - | Re.literal(">") - | Re.literal("<=") - | Re.literal(">="), -) PLUS = Terminal("PLUS", "+") MINUS = Terminal("MINUS", "-") STAR = Terminal("STAR", "*") SLASH = Terminal("SLASH", "/") +ABORT = Terminal("ABORT", "abort") +ACTION = Terminal("ACTION", "action") +ADD = Terminal("ADD", "add") +AFTER = Terminal("AFTER", "after") ALL = Terminal("ALL", "all") +ALTER = Terminal("ALTER", "alter") +ALWAYS = Terminal("ALWAYS", "always") AMMSC = Terminal("AMMSC", "ammsc") +AMPERSAND = Terminal("AMPERSAND", "&") +ANALYZE = Terminal("ANALYIZE", "analyze") ANY = Terminal("ANY", "any") AS = Terminal("AS", "as") ASC = Terminal("ASC", "asc") +ATTACH = Terminal("ATTACH", "attach") AUTHORIZATION = Terminal("AUTHORIZATION", "authorization") +AUTOINCREMENT = Terminal("AUTOINCREMENT", "autoincrement") +BEFORE = Terminal("BEFORE", "before") +BEGIN = Terminal("BEGIN", "begin") BETWEEN = Terminal("BETWEEN", "between") BY = Terminal("BY", "by") +CASCADE = Terminal("CASCADE", "cascade") +CASE = Terminal("CASE", "case") +CAST = Terminal("CAST", "cast") CHARACTER = Terminal("CHARACTER", "character") CHECK = Terminal("CHECK", "check") CLOSE = Terminal("CLOSE", "close") +COLLATE = Terminal("COLLATE", "collate") +COLUMN = Terminal("COLUMN", "column") COMMIT = Terminal("COMMIT", "commit") +CONFLICT = Terminal("CONFLICT", "conflict") +CONSTRAINT = Terminal("CONSTRAINT", "constraint") CONTINUE = Terminal("CONTINUE", "continue") CREATE = Terminal("CREATE", "create") +CROSS = Terminal("CROSS", "cross") CURRENT = Terminal("CURRENT", "current") +CURRENT_DATE = Terminal("CURRENT_DATE", "current_date") +CURRENT_TIME = Terminal("CURRENT_TIME", "current_time") +CURRENT_TIMESTAMP = Terminal("CURRENT_TIMESTAMP", "current_timestamp") CURSOR = Terminal("CURSOR", "cursor") +DATABASE = Terminal("DATABASE", "database") DECIMAL = Terminal("DECIMAL", "decimal") DECLARE = Terminal("DECLARE", "declare") DEFAULT = Terminal("DEFAULT", "default") +DEFERRABLE = Terminal("DEFERRABLE", "deferrable") +DEFERRED = Terminal("DEFERRED", "deferred") DELETE = Terminal("DELETE", "delete") DESC = Terminal("DESC", "desc") +DETACH = Terminal("DETACH", "detach") DISTINCT = Terminal("DISTINCT", "distinct") DOUBLE = Terminal("DOUBLE", "double") +DROP = Terminal("DROP", "drop") +EACH = Terminal("EACH", "each") +ELSE = Terminal("ELSE", "else") +END = Terminal("END", "end") +EQUALEQUAL = Terminal("EQUALEQUAL", "==") ESCAPE = Terminal("ESCAPE", "escape") +EXCLUSIVE = Terminal("EXCLUSIVE", "exclusive") EXISTS = Terminal("EXISTS", "exists") +EXPLAIN = Terminal("EXPLAIN", "explain") +FAIL = Terminal("FAIL", "fail") +FALSE = Terminal("FALSE", "false") FETCH = Terminal("FETCH", "fetch") FLOAT = Terminal("FLOAT", "float") FOR = Terminal("FOR", "for") FOREIGN = Terminal("FOREIGN", "foreign") FOUND = Terminal("FOUND", "found") FROM = Terminal("FROM", "from") +FULL = Terminal("FULL", "full") +GENERATED = Terminal("GENERATED", "generated") +GLOB = Terminal("GLOB", "glob") GOTO = Terminal("GOTO", "goto") GRANT = Terminal("GRANT", "grant") GROUP = Terminal("GROUP", "group") +GT = Terminal("GT", ">") +GT2 = Terminal("GT2", ">>") +GT_EQ = Terminal("GT_EQ", ">=") HAVING = Terminal("HAVING", "having") +IF = Terminal("IF", "if") +IGNORE = Terminal("IGNORE", "ignore") +IMMEDATE = Terminal("IMMEDIATE", "immedate") +IMMEDIATE = Terminal("IMMEDIATE", "immediate") IN = Terminal("IN", "in") +INDEX = Terminal("INDEX", "index") INDICATOR = Terminal("INDICATOR", "indicator") +INITIALLY = Terminal("INITIALLY", "initially") +INNER = Terminal("INNER", "inner") INSERT = Terminal("INSERT", "insert") +INSTEAD = Terminal("INSTEAD", "instead") INTEGER = Terminal("INTEGER", "integer") INTO = Terminal("INTO", "into") IS = Terminal("IS", "is") +ISNULL = Terminal("ISNULL", "isnull") # ?? +JOIN = Terminal("JOIN", "join") KEY = Terminal("KEY", "key") LANGUAGE = Terminal("LANGUAGE", "language") +LEFT = Terminal("LEFT", "left") LIKE = Terminal("LIKE", "like") +LT = Terminal("LT", "<") +LT2 = Terminal("LT2", "<<") +LT_EQ = Terminal("LT_EQ", "<=") +MATCH = Terminal("MATCH", "match") +NATURAL = Terminal("NATURAL", "natural") +NO = Terminal("NO", "no") +NOTHING = Terminal("NOTHING", "nothing") +NOTNULL = Terminal("NOTNULL", "notnull") # ?? +NOT_EQ = Terminal("NOT_EQ1", Re.literal("!=") | Re.literal("<>")) NULL = Terminal("NULL", "null") NUMERIC = Terminal("NUMERIC", "numeric") OF = Terminal("OF", "of") @@ -104,15 +167,33 @@ ON = Terminal("ON", "on") OPEN = Terminal("OPEN", "open") OPTION = Terminal("OPTION", "option") ORDER = Terminal("ORDER", "order") +OUTER = Terminal("OUTER", "outer") PARAMETER = Terminal("PARAMETER", "parameter") +PERCENT = Terminal("PERCENT", "%") +PIPE = Terminal("PIPE", "|") +PIPE2 = Terminal("PIPE2", "||") +PLAN = Terminal("PLAN", "plan") +PRAGMA = Terminal("PRAGMA", "pragma") PRECISION = Terminal("PRECISION", "precision") PRIMARY = Terminal("PRIMARY", "primary") PRIVILEGES = Terminal("PRIVILEGES", "privileges") PROCEDURE = Terminal("PROCEDURE", "procedure") PUBLIC = Terminal("PUBLIC", "public") +QUERY = Terminal("QUERY", "query") +RAISE = Terminal("RAISE", "raise") REAL = Terminal("REAL", "real") REFERENCES = Terminal("REFERENCES", "references") +REGEXP = Terminal("REGEXP", "regexp") +REINDEX = Terminal("REINDEX", "reindex") +RELEASE = Terminal("RELEASE", "release") +RENAME = Terminal("RENAME", "rename") +REPLACE = Terminal("REPLACE", "replace") +RESTRICT = Terminal("RESTRICT", "restrict") +RETURNING = Terminal("RETURNING", "returning") +RIGHT = Terminal("RIGHT", "right") ROLLBACK = Terminal("ROLLBACK", "rollback") +ROW = Terminal("ROW", "row") +SAVEPOINT = Terminal("SAVEPOINT", "savepoint") SCHEMA = Terminal("SCHEMA", "schema") SELECT = Terminal("SELECT", "select") SET = Terminal("SET", "set") @@ -120,18 +201,59 @@ SMALLINT = Terminal("SMALLINT", "smallint") SOME = Terminal("SOME", "some") SQLCODE = Terminal("SQLCODE", "sqlcode") SQLERROR = Terminal("SQLERROR", "sqlerror") +STORED = Terminal("STORED", "stored") TABLE = Terminal("TABLE", "table") +TEMP = Terminal("TEMP", "temp") +TEMPORARY = Terminal("TEMPORARY", "temporary") +THEN = Terminal("THEN", "then") TO = Terminal("TO", "to") +TRANSACTION = Terminal("TRANSACTION", "transaction") +TRIGGER = Terminal("TRIGGER", "trigger") +TRUE = Terminal("TRUE", "true") UNION = Terminal("UNION", "union") UNIQUE = Terminal("UNIQUE", "unique") UPDATE = Terminal("UPDATE", "update") USER = Terminal("USER", "user") +USING = Terminal("USING", "using") VALUES = Terminal("VALUES", "values") VIEW = Terminal("VIEW", "view") +VIRTUAL = Terminal("VIRTUAL", "virtual") +WHEN = Terminal("WHEN", "when") WHENEVER = Terminal("WHENEVER", "whenever") WHERE = Terminal("WHERE", "where") WITH = Terminal("WITH", "with") +WITHOUT = Terminal("WITHOUT", "without") WORK = Terminal("WORK", "work") +INTERSECT = Terminal("INTERSECT", "intersect") +EXCEPT = Terminal("EXCEPT", "except") +INDEXED = Terminal("INDEXED", "indexed") +VACUUM = Terminal("VACUUM", "vacuum") +FILTER = Terminal("FILTER", "filter") +PARTITION = Terminal("PARTITION", "partition") +EXCLUDE = Terminal("EXCLUDE", "exclude") +OTHERS = Terminal("OTHERS", "others") +TIES = Terminal("TIES", "ties") +RANGE = Terminal("RANGE", "range") +ROWS = Terminal("ROWS", "rows") +GROUPS = Terminal("GROUPS", "groups") +OVER = Terminal("OVER", "over") +RECURSIVE = Terminal("RECURSIVE", "recursive") +LIMIT = Terminal("LIMIT", "limit") +OFFSET = Terminal("OFFSET", "offset") +FIRST_VALUE = Terminal("FIRST_VALUE", "first_value") +LAST_VALUE = Terminal("LAST_VALUE", "last_value") +CUME_DIST = Terminal("CUME_DIST", "cume_dist") +PERCENT_RANK = Terminal("PERCENT_RANK", "percent_rank") +TILDE = Terminal("TILDE", "tilde") +DENSE_RANK = Terminal("DENSE_RANK", "dense_rank") +RANK = Terminal("RANK", "rank") +ROW_NUMBER = Terminal("ROW_NUMBER", "row_number") +LAG = Terminal("LAG", "lag") +LEAD = Terminal("LEAD", "lead") +NTH_VALUE = Terminal("NTH_VALUE", "nth_value") +NTILE = Terminal("NTILE", "ntile") +WINDOW = Terminal("WINDOW", "window") +DO = Terminal("DO", "do") SEMICOLON = Terminal("SEMICOLON", ";") LPAREN = Terminal("LPAREN", "(") @@ -160,200 +282,778 @@ def sql_list(): @rule def sql(): - return alt( - schema, - cursor_def, - manipulative_statement, - WHENEVER + NOT + FOUND + when_action, - WHENEVER + SQLERROR + when_action, + return opt(EXPLAIN + opt(QUERY + PLAN)) + alt( + alter_table_stmt, + analyze_stmt, + attach_stmt, + begin_stmt, + commit_stmt, + create_index_stmt, + create_table_stmt, + create_trigger_stmt, + create_view_stmt, + create_virtual_table_stmt, + delete_stmt, + delete_stmt_limited, + detach_stmt, + drop_stmt, + insert_stmt, + pragma_stmt, + reindex_stmt, + release_stmt, + rollback_stmt, + savepoint_stmt, + select_stmt, + update_stmt, + update_stmt_limited, + vacuum_stmt, ) @rule -def schema(): - return seq( - CREATE, - SCHEMA, - AUTHORIZATION, - user, - opt(schema_element_list), +def alter_table_stmt(): + return ( + ALTER + + TABLE + + opt(schema_name + DOT) + + table_name + + alt( + RENAME + alt((TO + table_name), (COLUMN + column_name + TO + column_name)), + (ADD + opt(COLUMN) + column_def), + (DROP + opt(COLUMN) + column_name), + ) ) -@rule(transparent=True) -def schema_element_list() -> Rule: - return schema_element | (schema_element_list + schema_element) +@rule +def analyze_stmt(): + return ANALYZE + opt(alt(schema_name, opt(schema_name + DOT) + table_or_index_name)) @rule -def schema_element(): - return base_table_def | view_def | privilege_def +def attach_stmt(): + return ATTACH + opt(DATABASE) + expr + AS + schema_name @rule -def base_table_def(): +def begin_stmt(): + return BEGIN + opt(DEFERRED | IMMEDIATE | EXCLUSIVE) + opt(TRANSACTION + opt(transaction_name)) + + +@rule +def commit_stmt(): + return (COMMIT | END) + opt(TRANSACTION) + + +@rule +def rollback_stmt(): + return ROLLBACK + opt(TRANSACTION) + opt(TO + opt(SAVEPOINT) + savepoint_name) + + +@rule +def savepoint_stmt(): + return SAVEPOINT + savepoint_name + + +@rule +def release_stmt(): + return RELEASE + opt(SAVEPOINT) + savepoint_name + + +def comma_list(*rules: Rule) -> Rule: + """A list of `rule` separated by commas. Must have at least one, no trailing comma.""" + rule = seq(*rules) + return seq(rule, zero_or_more(COMMA, rule)) + + +@rule +def create_index_stmt(): return seq( CREATE, - TABLE, - table, + opt(UNIQUE), + INDEX, + opt(IF + NOT + EXISTS), + opt(schema_name + DOT), + index_name, + ON, + table_name, LPAREN, - base_table_element_commalist, + comma_list(indexed_column), RPAREN, + opt(WHERE + expr), ) -@rule(transparent=True) -def base_table_element_commalist() -> Rule: - return opt(base_table_element_commalist + COMMA) + base_table_element +@rule +def indexed_column(): + return (column_name | expr) + opt(COLLATE + collation_name) + opt(asc_desc) -@rule(transparent=True) -def base_table_element(): - return column_def | table_constraint_def +@rule +def create_table_stmt(): + return seq( + CREATE, + opt(TEMP | TEMPORARY), + TABLE, + opt(IF, NOT, EXISTS), + opt(schema_name, DOT), + table_name, + alt( + seq( + LPAREN, + comma_list(column_def), + zero_or_more(COMMA, table_constraint), + RPAREN, + opt(WITHOUT, IDENTIFIER), + ), + seq(AS, select_stmt), + ), + ) @rule def column_def(): - return column + data_type + opt(column_def_list) - - -@rule(transparent=True) -def column_def_list() -> Rule: - return alt( - column_def_list + column_def_opt, - column_def_opt, - ) + return column_name + opt(type_name) + zero_or_more(column_constraint) @rule -def column_def_opt(): - return alt( - NOT + opt(NULL + opt(alt(UNIQUE, PRIMARY + KEY))), - DEFAULT + literal, - DEFAULT + NULL, - DEFAULT + USER, - CHECK + LPAREN + search_condition + RPAREN, - REFERENCES + table, - REFERENCES + table + LPAREN + column_commalist + RPAREN, - ) +def type_name(): + return name + opt(LPAREN, signed_number, opt(COMMA, signed_number), RPAREN) @rule -def table_constraint_def(): - return alt( - UNIQUE + LPAREN + column_commalist + RPAREN, - PRIMARY + KEY + LPAREN + column_commalist + RPAREN, - seq( - FOREIGN, - KEY, - LPAREN, - column_commalist, - RPAREN, - REFERENCES, - table, - opt(LPAREN + column_commalist + RPAREN), +def column_constraint(): + return seq( + opt(CONSTRAINT, name), + alt( + seq(PRIMARY, KEY, opt(asc_desc), opt(conflict_clause), opt(AUTOINCREMENT)), + seq(opt(NOT), (NULL | UNIQUE), opt(conflict_clause)), + seq(DEFAULT, signed_number | literal_value | seq(LPAREN, expr, RPAREN)), + seq(COLLATE, collation_name), + foreign_key_clause, + seq(opt(GENERATED, ALWAYS), AS, LPAREN, expr, RPAREN, opt(STORED | VIRTUAL)), ), - CHECK + LPAREN + search_condition + RPAREN, ) -@rule(transparent=True) -def column_commalist() -> Rule: - return opt(column_commalist + COMMA) + column +@rule +def signed_number(): + return opt(PLUS | MINUS) + NUMERIC_LITERAL @rule -def view_def(): +def table_constraint(): + return seq( + opt(CONSTRAINT, name), + alt( + seq( + (PRIMARY + KEY | UNIQUE), + LPAREN, + comma_list(indexed_column), + RPAREN, + opt(conflict_clause), + ), + seq(CHECK, LPAREN, expr, RPAREN), + seq( + FOREIGN, + KEY, + LPAREN, + comma_list(column_name), + RPAREN, + foreign_key_clause, + ), + ), + ) + + +@rule +def foreign_key_clause(): + return seq( + REFERENCES, + foreign_table, + opt(LPAREN, comma_list(column_name), RPAREN), + zero_or_more( + alt( + seq( + ON, + (DELETE | UPDATE), + alt( + SET + (NULL | DEFAULT), + CASCADE, + RESTRICT, + NO + ACTION, + ), + ), + MATCH + name, + ), + ), + opt(opt(NOT), DEFERRABLE, opt(INITIALLY, (DEFERRED | IMMEDIATE))), + ) + + +@rule +def conflict_clause(): + return seq(ON, CONFLICT, ROLLBACK | ABORT | FAIL | IGNORE | REPLACE) + + +@rule +def create_trigger_stmt(): return seq( CREATE, - VIEW, - table, - opt(LPAREN + column_commalist + RPAREN), - AS, - query_spec, - opt(with_check_option), - ) - - -@rule -def with_check_option(): - return WITH + CHECK + OPTION - - -@rule -def privilege_def(): - return seq( - GRANT, - privileges, + opt(TEMP | TEMPORARY), + TRIGGER, + opt(IF, NOT, EXISTS), + opt(schema_name, DOT), + trigger_name, + opt(BEFORE | AFTER | (INSTEAD + OF)), + (DELETE | INSERT | (UPDATE + opt(OF, comma_list(column_name)))), ON, - table, - TO, - grantee_commalist, - opt(with_grant_option), + table_name, + opt(FOR, EACH, ROW), + opt(WHEN, expr), + BEGIN, + one_or_more((update_stmt | insert_stmt | delete_stmt | select_stmt), SEMICOLON), + END, ) @rule -def with_grant_option(): - return WITH + GRANT + OPTION - - -@rule -def privileges(): - return (ALL + PRIVILEGES) | ALL | operation_commalist - - -@rule(transparent=True) -def operation_commalist() -> Rule: - return opt(operation_commalist + COMMA) + operation - - -@rule -def operation(): - return alt( - SELECT, - INSERT, - DELETE, - UPDATE + opt(LPAREN + column_commalist + RPAREN), - REFERENCES + opt(LPAREN + column_commalist + RPAREN), - ) - - -@rule(transparent=True) -def grantee_commalist() -> Rule: - return opt(grantee_commalist + COMMA) + grantee - - -@rule -def grantee(): - return PUBLIC | user - - -@rule -def cursor_def(): +def create_view_stmt(): return seq( - DECLARE, - cursor, - CURSOR, - FOR, - query_exp, - opt(order_by_clause), + CREATE, + opt(TEMP | TEMPORARY), + VIEW, + opt(IF, NOT, EXISTS), + opt(schema_name, DOT), + view_name, + opt(LPAREN, comma_list(column_name), RPAREN), + AS, + select_stmt, ) @rule -def order_by_clause(): - return ORDER + BY + ordering_spec_commalist +def create_virtual_table_stmt(): + return seq( + CREATE, + VIRTUAL, + TABLE, + opt(IF, NOT, EXISTS), + opt(schema_name, DOT), + table_name, + USING, + module_name, + opt(LPAREN, comma_list(module_argument), RPAREN), + ) @rule -def ordering_spec_commalist() -> Rule: - return opt(ordering_spec_commalist + COMMA) + ordering_spec +def with_clause(): + return seq( + WITH, + opt(RECURSIVE), + comma_list(cte_table_name, AS, LPAREN, select_stmt, RPAREN), + ) @rule -def ordering_spec(): - return (NUMBER + opt(asc_desc)) | (column_ref + opt(asc_desc)) +def cte_table_name(): + return table_name + opt(LPAREN, comma_list(column_name), RPAREN) + + +@rule +def recursive_cte(): + return seq( + cte_table_name, + AS, + LPAREN, + initial_select, + UNION, + opt(ALL), + recursive_select, + RPAREN, + ) + + +@rule +def common_table_expression(): + return seq( + table_name, + opt(LPAREN, comma_list(column_name), RPAREN), + AS, + LPAREN, + select_stmt, + RPAREN, + ) + + +@rule +def delete_stmt(): + return seq( + opt(with_clause), + DELETE, + FROM, + qualified_table_name, + opt(WHERE, expr), + opt(returning_clause), + ) + + +@rule +def delete_stmt_limited(): + return seq( + opt(with_clause), + DELETE, + FROM, + qualified_table_name, + opt(WHERE, expr), + opt(returning_clause), + opt( + opt(order_by_stmt), + limit_stmt, + ), + ) + + +@rule +def detach_stmt(): + return DETACH + opt(DATABASE) + schema_name + + +@rule +def drop_stmt(): + return seq( + DROP, + (INDEX | TABLE | TRIGGER | VIEW), + opt(IF, EXISTS), + opt(schema_name, DOT), + any_name, + ) + + +# +# SQLite understands the following binary operators, in order from highest to lowest precedence: +# || +# * / % +# + - +# << >> & | +# < <= > >= +# = == != <> IS IS NOT IS DISTINCT FROM IS NOT DISTINCT FROM IN LIKE GLOB MATCH REGEXP +# AND +# OR +# +@rule +def expr(): + return alt( + literal_value, + BIND_PARAMETER, + opt(opt(schema_name, DOT), table_name, DOT) + column_name, + unary_operator + expr, + expr + PIPE2 + expr, + expr + (STAR | SLASH | PERCENT) + expr, + expr + (PLUS | MINUS) + expr, + expr + (LT2 | GT2 | AMPERSAND | PIPE) + expr, + expr + (LT | LT_EQ | GT | GT_EQ) + expr, + seq( + expr, + alt( + EQUAL, + EQUALEQUAL, + NOT_EQ, + IS, + seq(IS, NOT), + seq(IS, opt(NOT), DISTINCT, FROM), + IN, + LIKE, + GLOB, + MATCH, + REGEXP, + ), + expr, + ), + expr + AND + expr, + expr + OR + expr, + seq( + function_name, + LPAREN, + opt((opt(DISTINCT) + comma_list(expr)) | STAR), + RPAREN, + opt(filter_clause), + opt(over_clause), + ), + LPAREN + comma_list(expr) + RPAREN, + CAST + LPAREN + expr + AS + type_name + RPAREN, + expr + COLLATE + collation_name, + expr + opt(NOT) + (LIKE | GLOB | REGEXP | MATCH) + expr + opt(ESCAPE, expr), + expr + (ISNULL | NOTNULL | seq(NOT, NULL)), + expr + IS + opt(NOT) + expr, + expr + opt(NOT) + BETWEEN + expr + AND + expr, + seq( + expr, + opt(NOT), + IN, + alt( + LPAREN + opt(select_stmt | comma_list(expr)) + RPAREN, + opt(schema_name, DOT) + table_name, + seq( + opt(schema_name, DOT), + table_function_name, + LPAREN, + opt(comma_list(expr)), + RPAREN, + ), + ), + ), + opt(opt(NOT), EXISTS) + LPAREN + select_stmt + RPAREN, + CASE + opt(expr) + one_or_more(WHEN, expr, THEN, expr) + opt(ELSE, expr) + END, + raise_function, + ) + + +@rule +def raise_function(): + return seq( + RAISE, LPAREN, (IGNORE | seq((ROLLBACK | ABORT | FAIL), COMMA, error_message)), RPAREN + ) + + +@rule +def literal_value(): + return alt( + NUMERIC_LITERAL, + STRING_LITERAL, + BLOB_LITERAL, + NULL, + TRUE, + FALSE, + CURRENT_TIME, + CURRENT_DATE, + CURRENT_TIMESTAMP, + ) + + +@rule +def value_row(): + return LPAREN + comma_list(expr) + RPAREN + + +@rule +def values_clause(): + return VALUES + comma_list(value_row) + + +@rule +def insert_stmt(): + return seq( + opt(with_clause), + INSERT | REPLACE | seq(INSERT, OR, REPLACE | ROLLBACK | ABORT | FAIL | IGNORE), + INTO, + opt(schema_name, DOT), + table_name, + opt(AS, table_alias), + opt(LPAREN, comma_list(column_name), RPAREN), + (((values_clause | select_stmt) + opt(upsert_clause)) | seq(DEFAULT, VALUES)), + opt(returning_clause), + ) + + +@rule +def returning_clause(): + return RETURNING + comma_list(result_column) + + +@rule +def upsert_clause(): + return seq( + ON, + CONFLICT, + opt(LPAREN, comma_list(indexed_column), RPAREN, opt(WHERE, expr)), + DO, + alt( + NOTHING, + seq( + UPDATE, + SET, + comma_list((column_name | column_name_list), EQUAL, expr), + opt(WHERE, expr), + ), + ), + ) + + +@rule +def pragma_stmt(): + return seq( + PRAGMA, + opt(schema_name, DOT), + pragma_name, + opt((EQUAL + pragma_value) | (LPAREN + pragma_value + RPAREN)), + ) + + +@rule +def pragma_value(): + return signed_number | name | STRING_LITERAL + + +@rule +def reindex_stmt(): + return REINDEX + opt(collation_name | (opt(schema_name, DOT) + (table_name | index_name))) + + +@rule +def select_stmt(): + return seq( + opt(common_table_stmt), + select_core, + zero_or_more(compound_operator, select_core), + opt(order_by_stmt), + opt(limit_stmt), + ) + + +@rule +def join_clause(): + return table_or_subquery + zero_or_more(join_operator, table_or_subquery, opt(join_constraint)) + + +@rule +def select_core(): + return alt( + seq( + SELECT, + opt(DISTINCT | ALL), + comma_list(result_column), + opt(FROM, comma_list(table_or_subquery) | join_clause), + opt(WHERE, expr), + opt(GROUP, BY, comma_list(expr), opt(HAVING, expr)), + opt(WINDOW, comma_list(window_name, AS, window_defn)), + ), + values_clause, + ) + + +@rule +def factored_select_stmt(): + return select_stmt + + +@rule +def simple_select_stmt(): + return opt(common_table_stmt) + select_core + opt(order_by_stmt) + opt(limit_stmt) + + +@rule +def compound_select_stmt(): + return seq( + opt(common_table_stmt), + select_core, + one_or_more((UNION + ALL) | INTERSECT | EXCEPT, select_core), + opt(order_by_stmt), + opt(limit_stmt), + ) + + +@rule +def table_or_subquery(): + return alt( + seq( + opt(schema_name, DOT), + table_name, + opt(opt(AS), table_alias), + opt(seq(INDEXED, BY, index_name) | (NOT + INDEXED)), + ), + seq( + opt(schema_name, DOT), + table_function_name, + LPAREN, + comma_list(expr), + RPAREN, + opt(AS, table_alias), + ), + seq(LPAREN, comma_list(table_or_subquery) | join_clause, RPAREN), + seq(LPAREN, select_stmt, RPAREN, opt(opt(AS), table_alias)), + ) + + +@rule +def result_column(): + return STAR | seq(table_name, DOT, STAR) | seq(expr, opt(opt(AS), column_alias)) + + +@rule +def join_operator(): + return alt( + COMMA, + seq(opt(NATURAL), opt(seq(LEFT | RIGHT | FULL, opt(OUTER)) | INNER | CROSS), JOIN), + ) + + +@rule +def join_constraint(): + return alt( + ON + expr, + USING + LPAREN + comma_list(column_name) + RPAREN, + ) + + +@rule +def compound_operator(): + return UNION + opt(ALL) | INTERSECT | EXCEPT + + +@rule +def update_stmt(): + return seq( + opt(with_clause), + UPDATE, + opt(OR, ROLLBACK | ABORT | REPLACE | FAIL | IGNORE), + qualified_table_name, + SET, + comma_list(column_name | column_name_list, EQUAL, expr), + opt(FROM, comma_list(table_or_subquery) | join_clause), + opt(WHERE, expr), + opt(returning_clause), + ) + + +@rule +def column_name_list(): + return LPAREN + comma_list(column_name) + RPAREN + + +@rule +def update_stmt_limited(): + return seq( + opt(with_clause), + UPDATE, + opt(OR, ROLLBACK | ABORT | REPLACE | FAIL | IGNORE), + qualified_table_name, + SET, + comma_list(column_name | column_name_list, EQUAL, expr), + opt(WHERE, expr), + opt(returning_clause), + opt(opt(order_by_stmt), limit_stmt), + ) + + +@rule +def qualified_table_name(): + return seq( + opt(schema_name, DOT), + table_name, + opt(AS, alias), + opt(INDEXED + BY + index_name | NOT + INDEXED), + ) + + +@rule +def vacuum_stmt(): + return VACUUM + opt(schema_name) + opt(INTO, filename) + + +@rule +def filter_clause(): + return FILTER + LPAREN + WHERE + expr + RPAREN + + +@rule +def window_defn(): + return seq( + LPAREN, + opt(base_window_name), + opt(PARTITION, BY, comma_list(expr)), + ORDER, + BY, + comma_list(ordering_term), + opt(frame_spec), + RPAREN, + ) + + +@rule +def over_clause(): + return seq( + OVER, + alt( + window_name, + seq( + LPAREN, + opt(base_window_name), + opt(PARTITION, BY, comma_list(expr)), + opt(ORDER, BY, comma_list(ordering_term)), + opt(frame_spec), + RPAREN, + ), + ), + ) + + +@rule +def frame_spec(): + return frame_clause + opt(EXCLUDE, NO + OTHERS | CURRENT + ROW | GROUP | TIES) + + +@rule +def frame_clause(): + return seq( + RANGE | ROWS | GROUPS, + frame_single | seq(BETWEEN, frame_left, AND, frame_right), + ) + + +@rule +def simple_function_invocation(): + return seq(simple_func, LPAREN, comma_list(expr) | STAR, RPAREN) + + +@rule +def aggregate_function_invocation(): + return seq( + aggregate_func, + LPAREN, + opt(opt(DISTINCT), comma_list(expr) | STAR), + RPAREN, + opt(filter_clause), + ) + + +@rule +def window_function_invocation(): + return seq( + window_function, + LPAREN, + opt(comma_list(expr) | STAR), + RPAREN, + opt(filter_clause), + OVER, + window_defn | window_name, + ) + + +@rule +def common_table_stmt(): + return seq(WITH, opt(RECURSIVE), comma_list(common_table_expression)) + + +@rule +def order_by_stmt(): + return seq(ORDER, BY, comma_list(ordering_term)) + + +@rule +def limit_stmt(): + return seq(LIMIT, expr, opt(OFFSET | COMMA, expr)) + + +NULLS = Terminal("NULLS", "nulls") +FIRST = Terminal("FIRST", "first") +LAST = Terminal("LAST", "last") + + +@rule +def ordering_term(): + return seq(expr, opt(COLLATE, collation_name), opt(asc_desc), opt(NULLS, FIRST | LAST)) @rule @@ -361,388 +1061,425 @@ def asc_desc(): return ASC | DESC +PRECEDING = Terminal("PRECEDING", "preceding") +FOLLOWING = Terminal("FOLLOWING", "following") +UNBOUNDED = Terminal("UNBOUNDED", "unbounded") + + @rule -def manipulative_statement(): +def frame_left(): return alt( - close_statement, - commit_statement, - delete_statement_positioned, - delete_statement_searched, - fetch_statement, - insert_statement, - open_statement, - rollback_statement, - select_statement, - update_statement_positioned, - update_statement_searched, + expr + PRECEDING, + expr + FOLLOWING, + CURRENT + ROW, + UNBOUNDED + PRECEDING, ) @rule -def close_statement(): - return CLOSE + cursor +def frame_right(): + return alt( + expr + PRECEDING, + expr + FOLLOWING, + CURRENT + ROW, + UNBOUNDED + FOLLOWING, + ) @rule -def commit_statement(): - return COMMIT + opt(WORK) +def frame_single(): + return alt( + expr + PRECEDING, + UNBOUNDED + PRECEDING, + CURRENT + ROW, + ) @rule -def delete_statement_positioned(): - return DELETE + FROM + table + WHERE + CURRENT + OF + cursor +def window_function(): + return alt( + seq( + FIRST_VALUE | LAST_VALUE, + seq(LPAREN, expr, RPAREN), + OVER, + seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, opt(frame_clause), RPAREN), + ), + seq( + CUME_DIST | PERCENT_RANK, + seq(LPAREN, RPAREN), + OVER, + seq(LPAREN, opt(partition_by), opt(order_by_expr), RPAREN), + ), + seq( + DENSE_RANK | RANK | ROW_NUMBER, + seq(LPAREN, RPAREN), + OVER, + seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN), + ), + seq( + LAG | LEAD, + seq(LPAREN, expr, opt(offset), opt(default_value), RPAREN), + OVER, + seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN), + ), + seq( + NTH_VALUE, + seq(LPAREN, expr, COMMA, signed_number, RPAREN), + OVER, + seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, opt(frame_clause), RPAREN), + ), + seq( + NTILE, + seq(LPAREN, expr, RPAREN), + OVER, + seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN), + ), + ) @rule -def delete_statement_searched(): - return DELETE + FROM + table + opt(where_clause) +def offset(): + return COMMA + signed_number @rule -def fetch_statement(): - return FETCH + cursor + INTO + target_commalist +def default_value(): + return COMMA + signed_number @rule -def insert_statement(): - return seq( +def partition_by(): + return PARTITION + BY + one_or_more(expr) + + +@rule +def order_by_expr(): + return ORDER + BY + one_or_more(expr) + + +@rule +def order_by_expr_asc_desc(): + return ORDER + BY + expr_asc_desc + + +@rule +def expr_asc_desc(): + return comma_list(expr, opt(asc_desc)) + + +# TODO BOTH OF THESE HAVE TO BE REWORKED TO FOLLOW THE SPEC +@rule +def initial_select(): + return select_stmt + + +@rule +def recursive_select(): + return select_stmt + + +@rule +def unary_operator(): + return MINUS | PLUS | TILDE | NOT + + +@rule +def error_message(): + return STRING_LITERAL + + +@rule +def module_argument(): # TODO check what exactly is permitted here + return expr | column_def + + +@rule +def column_alias(): + return IDENTIFIER | STRING_LITERAL + + +@rule +def keyword(): + return alt( + ABORT, + ACTION, + ADD, + AFTER, + ALL, + ALTER, + ANALYZE, + AND, + AS, + ASC, + ATTACH, + AUTOINCREMENT, + BEFORE, + BEGIN, + BETWEEN, + BY, + CASCADE, + CASE, + CAST, + CHECK, + COLLATE, + COLUMN, + COMMIT, + CONFLICT, + CONSTRAINT, + CREATE, + CROSS, + CURRENT_DATE, + CURRENT_TIME, + CURRENT_TIMESTAMP, + DATABASE, + DEFAULT, + DEFERRABLE, + DEFERRED, + DELETE, + DESC, + DETACH, + DISTINCT, + DROP, + EACH, + ELSE, + END, + ESCAPE, + EXCEPT, + EXCLUSIVE, + EXISTS, + EXPLAIN, + FAIL, + FOR, + FOREIGN, + FROM, + FULL, + GLOB, + GROUP, + HAVING, + IF, + IGNORE, + IMMEDIATE, + IN, + INDEX, + INDEXED, + INITIALLY, + INNER, INSERT, + INSTEAD, + INTERSECT, INTO, - table, - opt(LPAREN, column_commalist, RPAREN), - values_or_query_spec, - ) - - -@rule -def values_or_query_spec(): - return alt( - VALUES + LPAREN + insert_atom_commalist + RPAREN, - query_spec, - ) - - -@rule(transparent=True) -def insert_atom_commalist() -> Rule: - return opt(insert_atom_commalist + COMMA) + insert_atom - - -@rule -def insert_atom(): - return atom | NULL - - -@rule -def open_statement(): - return OPEN + cursor - - -@rule -def rollback_statement(): - return ROLLBACK + opt(WORK) - - -@rule -def select_statement(): - return seq( + IS, + ISNULL, + JOIN, + KEY, + LEFT, + LIKE, + LIMIT, + MATCH, + NATURAL, + NO, + NOT, + NOTNULL, + NULL, + OF, + OFFSET, + ON, + OR, + ORDER, + OUTER, + PLAN, + PRAGMA, + PRIMARY, + QUERY, + RAISE, + RECURSIVE, + REFERENCES, + REGEXP, + REINDEX, + RELEASE, + RENAME, + REPLACE, + RESTRICT, + RIGHT, + ROLLBACK, + ROW, + ROWS, + SAVEPOINT, SELECT, - opt(all_distinct), - selection, - INTO, - target_commalist, - table_exp, + SET, + TABLE, + TEMP, + TEMPORARY, + THEN, + TO, + TRANSACTION, + TRIGGER, + UNION, + UNIQUE, + UPDATE, + USING, + VACUUM, + VALUES, + VIEW, + VIRTUAL, + WHEN, + WHERE, + WITH, + WITHOUT, + FIRST_VALUE, + OVER, + PARTITION, + RANGE, + PRECEDING, + UNBOUNDED, + CURRENT, + FOLLOWING, + CUME_DIST, + DENSE_RANK, + LAG, + LAST_VALUE, + LEAD, + NTH_VALUE, + NTILE, + PERCENT_RANK, + RANK, + ROW_NUMBER, + GENERATED, + ALWAYS, + STORED, + TRUE, + FALSE, + WINDOW, + NULLS, + FIRST, + LAST, + FILTER, + GROUPS, + EXCLUDE, ) -@rule(transparent=True) -def all_distinct(): - return ALL | DISTINCT +# TODO: check all names below @rule -def update_statement_positioned(): - return UPDATE + table + SET + assignment_commalist + WHERE + CURRENT + OF + cursor - - -@rule(transparent=True) -def assignment_commalist() -> Rule: - return opt(assignment_commalist + COMMA) + assignment +def name(): + return any_name @rule -def assignment(): - return column + EQUAL + (scalar_exp | NULL) +def function_name(): + return any_name @rule -def update_statement_searched(): - return UPDATE + table + SET + assignment_commalist + opt(where_clause) - - -@rule(transparent=True) -def target_commalist() -> Rule: - # TODO: So many commalists, it would be great if we could make this a - # macro or something. - return opt(target_commalist + COMMA) + target +def schema_name(): + return any_name @rule -def target(): - return parameter_ref - - -# /* query expressions */ +def table_name(): + return any_name @rule -def query_exp() -> Rule: - return query_term | (query_exp + UNION + opt(ALL) + query_term) +def table_or_index_name(): + return any_name @rule -def query_term(): - return query_spec | (LPAREN + query_exp + RPAREN) +def column_name(): + return any_name @rule -def query_spec(): - return SELECT + opt(all_distinct) + selection + table_exp +def collation_name(): + return any_name @rule -def selection(): - return scalar_exp_commalist | STAR +def foreign_table(): + return any_name @rule -def table_exp(): - return from_clause + opt(where_clause) + opt(group_by_clause) + opt(having_clause) +def index_name(): + return any_name @rule -def from_clause(): - return FROM + table_ref_commalist - - -@rule(transparent=True) -def table_ref_commalist() -> Rule: - return opt(table_ref_commalist + COMMA) + table_ref +def trigger_name(): + return any_name @rule -def table_ref(): - return table + opt(range_variable) +def view_name(): + return any_name @rule -def where_clause(): - return WHERE + search_condition +def module_name(): + return any_name @rule -def group_by_clause(): - return GROUP + BY + column_ref_commalist - - -@rule(transparent=True) -def column_ref_commalist() -> Rule: - return opt(column_ref_commalist + COMMA) + column_ref +def pragma_name(): + return any_name @rule -def having_clause(): - return HAVING + search_condition - - -# /* search conditions */ +def savepoint_name(): + return any_name @rule -def search_condition() -> Rule: - return alt( - search_condition + OR + search_condition, - search_condition + AND + search_condition, - NOT + search_condition, - LPAREN + search_condition + RPAREN, - predicate, - ) +def table_alias(): + return any_name @rule -def predicate(): - return alt( - comparison_predicate, - between_predicate, - like_predicate, - test_for_null, - in_predicate, - all_or_any_predicate, - existence_test, - ) +def transaction_name(): + return any_name @rule -def comparison_predicate(): - return scalar_exp + COMPARISON + (scalar_exp | subquery) +def window_name(): + return any_name @rule -def between_predicate(): - return scalar_exp + opt(NOT) + BETWEEN + scalar_exp + AND + scalar_exp +def alias(): + return any_name @rule -def like_predicate(): - return scalar_exp + opt(NOT) + LIKE + atom + opt(escape) +def filename(): + return any_name @rule -def escape(): - return ESCAPE + atom +def base_window_name(): + return any_name @rule -def test_for_null(): - return column_ref + IS + opt(NOT) + NULL +def simple_func(): + return any_name @rule -def in_predicate(): - return scalar_exp + opt(NOT) + IN + LPAREN + alt(subquery | atom_commalist) + RPAREN +def aggregate_func(): + return any_name @rule -def atom_commalist() -> Rule: - return opt(atom_commalist + COMMA) + atom +def table_function_name(): + return any_name @rule -def all_or_any_predicate(): - return scalar_exp + COMPARISON + any_all_some + subquery - - -@rule(transparent=True) -def any_all_some(): - return ANY | ALL | SOME - - -@rule -def existence_test(): - return EXISTS + subquery - - -@rule -def subquery(): - return LPAREN + SELECT + opt(all_distinct) + selection + table_exp + RPAREN - - -# /* scalar expressions */ - - -@rule -def scalar_exp(): - return alt( - scalar_exp + (PLUS | MINUS | STAR | SLASH) + scalar_exp, - PLUS + scalar_exp, - MINUS + scalar_exp, - atom, - column_ref, - function_ref, - LPAREN + scalar_exp + RPAREN, - ) - - -@rule -def scalar_exp_commalist() -> Rule: - return opt(scalar_exp_commalist + COMMA) + scalar_exp - - -@rule -def atom(): - return parameter_ref | literal | USER - - -@rule -def parameter_ref(): - return parameter | (parameter + parameter) | (parameter + INDICATOR + parameter) - - -@rule -def function_ref(): - return alt( - AMMSC + LPAREN + STAR + RPAREN, - AMMSC + LPAREN + DISTINCT + column_ref + RPAREN, - AMMSC + LPAREN + ALL + scalar_exp + RPAREN, - AMMSC + LPAREN + scalar_exp + RPAREN, - ) - - -@rule -def literal(): - return STRING | NUMBER - - -# /* miscellaneous */ - - -@rule -def table(): - return opt(NAME + DOT) + NAME - - -@rule -def column_ref(): - return opt(opt(NAME + DOT) + NAME + DOT) + NAME - - -# /* data types */ - - -@rule -def data_type(): - return alt( - CHARACTER + opt(LPAREN + NUMBER + RPAREN), - NUMERIC + opt(LPAREN + NUMBER + opt(COMMA + NUMBER) + RPAREN), - DECIMAL + opt(LPAREN + NUMBER + opt(COMMA + NUMBER) + RPAREN), - INTEGER, - SMALLINT, - FLOAT + opt(LPAREN + NUMBER + RPAREN), - REAL, - DOUBLE + PRECISION, - ) - - -# /* the various things you can name */ - - -@rule -def column(): - return NAME - - -@rule -def cursor(): - return NAME - - -@rule -def parameter(): - return PARAMETER # :name handled in parser??? - - -@rule -def range_variable(): - return NAME - - -@rule -def user(): - return NAME - - -@rule -def when_action(): - return (GOTO + NAME) | CONTINUE +def any_name(): + return IDENTIFIER | keyword | STRING_LITERAL | seq(LPAREN, any_name, RPAREN) SQL = Grammar( @@ -751,7 +1488,7 @@ SQL = Grammar( (Assoc.LEFT, [OR]), (Assoc.LEFT, [AND]), (Assoc.LEFT, [NOT]), - (Assoc.LEFT, [COMPARISON]), + (Assoc.LEFT, []), (Assoc.LEFT, [PLUS, MINUS]), (Assoc.LEFT, [STAR, SLASH]), # TODO: Unary minus @@ -760,6 +1497,13 @@ SQL = Grammar( name="SQL", ) -if __name__=="__main__": - tbl = SQL.build_table() - print(tbl.format()) +if __name__ == "__main__": + import cProfile + + print("Starting...") + with cProfile.Profile() as pr: + try: + SQL.build_table() + finally: + pr.dump_stats("sql.pprof") + print("Wrote output to sql.pprof") diff --git a/parser/parser.py b/parser/parser.py index 862d53a..78cc51e 100644 --- a/parser/parser.py +++ b/parser/parser.py @@ -1116,7 +1116,9 @@ class ParserGenerator: # Check to make sure they didn't use anything that will give us # heartburn later. - reserved = [a for a in alphabet if (a.startswith("__") and not a.startswith("__gen_")) or a == "$"] + reserved = [ + a for a in alphabet if (a.startswith("__") and not a.startswith("__gen_")) or a == "$" + ] if reserved: raise ValueError( "Can't use {symbols} in grammars, {what} reserved.".format( @@ -1619,6 +1621,7 @@ class Terminal(Rule): _CURRENT_DEFINITION: str = "__global" _CURRENT_GEN_INDEX: int = 0 + class NonTerminal(Rule): """A non-terminal, or a production, in the grammar. @@ -1699,7 +1702,6 @@ class NonTerminal(Rule): _CURRENT_DEFINITION = prev_defn _CURRENT_GEN_INDEX = prev_idx - return self._body def flatten( @@ -1827,10 +1829,11 @@ def one_or_more(*args: Rule) -> Rule: global _CURRENT_DEFINITION global _CURRENT_GEN_INDEX - tail : NonTerminal | None = None + tail: NonTerminal | None = None + def impl() -> Rule: nonlocal tail - assert(tail is not None) + assert tail is not None return opt(tail) + seq(*args) tail = NonTerminal( @@ -1842,6 +1845,7 @@ def one_or_more(*args: Rule) -> Rule: return tail + def zero_or_more(*args: Rule) -> Rule: """Generate a rule that matches a repetition of zero or more of the specified rule. @@ -1852,6 +1856,7 @@ def zero_or_more(*args: Rule) -> Rule: """ return opt(one_or_more(*args)) + @typing.overload def rule(f: typing.Callable, /) -> NonTerminal: ... @@ -2848,9 +2853,12 @@ class TriviaMode(enum.Enum): # Finally, the grammar class. ############################################################################### -PrecedenceList = list[typing.Tuple[Assoc, list[Terminal|NonTerminal]]] +PrecedenceList = list[typing.Tuple[Assoc, list[Terminal | NonTerminal]]] -def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str,NonTerminal], dict[str,Terminal]]: + +def gather_grammar( + start: NonTerminal, trivia: list[Terminal] +) -> tuple[dict[str, NonTerminal], dict[str, Terminal]]: """Starting from the given NonTerminal, gather all of the symbols (NonTerminals and Terminals) that make up the grammar. """ @@ -2894,9 +2902,11 @@ def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str existing = named_rules.get(rule.name) if existing is not None: # TODO TEST - raise ValueError(f"""Found more than one rule named {rule.name}: + raise ValueError( + f"""Found more than one rule named {rule.name}: - {existing.definition_location} -- {rule.definition_location}""") +- {rule.definition_location}""" + ) named_rules[rule.name] = rule named_terminals: dict[str, Terminal] = {} @@ -2904,16 +2914,20 @@ def gather_grammar(start: NonTerminal, trivia: list[Terminal]) -> tuple[dict[str existing = named_terminals.get(terminal.name) if existing is not None: # TODO TEST - raise ValueError(f"""Found more than one terminal named {terminal.name}: + raise ValueError( + f"""Found more than one terminal named {terminal.name}: - {existing.definition_location} -- {terminal.definition_location}""") +- {terminal.definition_location}""" + ) existing_rule = named_rules.get(terminal.name) if existing_rule is not None: # TODO TEST - raise ValueError(f"""Found a terminal and a rule both named {terminal.name}: + raise ValueError( + f"""Found a terminal and a rule both named {terminal.name}: - The rule was defined at {existing_rule.definition_location} -- The terminal was defined at {terminal.definition_location}""") +- The terminal was defined at {terminal.definition_location}""" + ) named_terminals[terminal.name] = terminal @@ -3010,7 +3024,7 @@ class Grammar: generate_nonterminal_dict- less useful to people, probably, but it is the input form needed by the Generator. """ - grammar: list[tuple[str,list[str]]] = [ + grammar: list[tuple[str, list[str]]] = [ (rule.name, [s.name for s in production]) for rule in self._nonterminals.values() for production in rule.body