lrparsers/examples/sql.py

1564 lines
36 KiB
Python

from parser import *
IDENTIFIER = Terminal(
"IDENTIFIER",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
STRING_LITERAL = Terminal(
"STRING_LITERAL",
Re.seq(
Re.literal("'"),
(~Re.set("'", "\\") | (Re.set("\\") + Re.any())).star(),
Re.literal("'"),
),
highlight=highlight.string.quoted,
)
NUMERIC_LITERAL = Terminal("NUMERIC_LITERAL", Re.set(("0", "9")).plus())
BLOB_LITERAL = Terminal("BLOB_TERMINAL", Re.literal("X") + STRING_LITERAL)
BIND_PARAMETER = Terminal(
"BIND_PARAMETER",
Re.literal("?") + Re.set(("0", "9")).star() | Re.set(":", "@", "$") + IDENTIFIER,
)
NUMBER = Terminal(
"NUMBER",
Re.seq(
Re.set(("0", "9")).plus(),
Re.seq(
Re.literal("."),
Re.set(("0", "9")).plus(),
).question(),
Re.seq(
Re.set("e", "E"),
Re.set("+", "-").question(),
Re.set(("0", "9")).plus(),
).question(),
),
highlight=highlight.constant.numeric,
)
OR = Terminal("OR", "or")
AND = Terminal("AND", "and")
NOT = Terminal("NOT", "not")
PLUS = Terminal("PLUS", "+")
MINUS = Terminal("MINUS", "-")
STAR = Terminal("STAR", "*")
SLASH = Terminal("SLASH", "/")
ABORT = Terminal("ABORT", "abort")
ACTION = Terminal("ACTION", "action")
ADD = Terminal("ADD", "add")
AFTER = Terminal("AFTER", "after")
ALL = Terminal("ALL", "all")
ALTER = Terminal("ALTER", "alter")
ALWAYS = Terminal("ALWAYS", "always")
AMMSC = Terminal("AMMSC", "ammsc")
AMPERSAND = Terminal("AMPERSAND", "&")
ANALYZE = Terminal("ANALYIZE", "analyze")
ANY = Terminal("ANY", "any")
AS = Terminal("AS", "as")
ASC = Terminal("ASC", "asc")
ATTACH = Terminal("ATTACH", "attach")
AUTHORIZATION = Terminal("AUTHORIZATION", "authorization")
AUTOINCREMENT = Terminal("AUTOINCREMENT", "autoincrement")
BEFORE = Terminal("BEFORE", "before")
BEGIN = Terminal("BEGIN", "begin")
BETWEEN = Terminal("BETWEEN", "between")
BY = Terminal("BY", "by")
CASCADE = Terminal("CASCADE", "cascade")
CASE = Terminal("CASE", "case")
CAST = Terminal("CAST", "cast")
CHARACTER = Terminal("CHARACTER", "character")
CHECK = Terminal("CHECK", "check")
CLOSE = Terminal("CLOSE", "close")
COLLATE = Terminal("COLLATE", "collate")
COLUMN = Terminal("COLUMN", "column")
COMMIT = Terminal("COMMIT", "commit")
CONFLICT = Terminal("CONFLICT", "conflict")
CONSTRAINT = Terminal("CONSTRAINT", "constraint")
CONTINUE = Terminal("CONTINUE", "continue")
CREATE = Terminal("CREATE", "create")
CROSS = Terminal("CROSS", "cross")
CURRENT = Terminal("CURRENT", "current")
CURRENT_DATE = Terminal("CURRENT_DATE", "current_date")
CURRENT_TIME = Terminal("CURRENT_TIME", "current_time")
CURRENT_TIMESTAMP = Terminal("CURRENT_TIMESTAMP", "current_timestamp")
CURSOR = Terminal("CURSOR", "cursor")
DATABASE = Terminal("DATABASE", "database")
DECIMAL = Terminal("DECIMAL", "decimal")
DECLARE = Terminal("DECLARE", "declare")
DEFAULT = Terminal("DEFAULT", "default")
DEFERRABLE = Terminal("DEFERRABLE", "deferrable")
DEFERRED = Terminal("DEFERRED", "deferred")
DELETE = Terminal("DELETE", "delete")
DESC = Terminal("DESC", "desc")
DETACH = Terminal("DETACH", "detach")
DISTINCT = Terminal("DISTINCT", "distinct")
DOUBLE = Terminal("DOUBLE", "double")
DROP = Terminal("DROP", "drop")
EACH = Terminal("EACH", "each")
ELSE = Terminal("ELSE", "else")
END = Terminal("END", "end")
EQUALEQUAL = Terminal("EQUALEQUAL", "==")
ESCAPE = Terminal("ESCAPE", "escape")
EXCLUSIVE = Terminal("EXCLUSIVE", "exclusive")
EXISTS = Terminal("EXISTS", "exists")
EXPLAIN = Terminal("EXPLAIN", "explain")
FAIL = Terminal("FAIL", "fail")
FALSE = Terminal("FALSE", "false")
FETCH = Terminal("FETCH", "fetch")
FLOAT = Terminal("FLOAT", "float")
FOR = Terminal("FOR", "for")
FOREIGN = Terminal("FOREIGN", "foreign")
FOUND = Terminal("FOUND", "found")
FROM = Terminal("FROM", "from")
FULL = Terminal("FULL", "full")
GENERATED = Terminal("GENERATED", "generated")
GLOB = Terminal("GLOB", "glob")
GOTO = Terminal("GOTO", "goto")
GRANT = Terminal("GRANT", "grant")
GROUP = Terminal("GROUP", "group")
GT = Terminal("GT", ">")
GT2 = Terminal("GT2", ">>")
GT_EQ = Terminal("GT_EQ", ">=")
HAVING = Terminal("HAVING", "having")
IF = Terminal("IF", "if")
IGNORE = Terminal("IGNORE", "ignore")
IMMEDATE = Terminal("IMMEDIATE", "immedate")
IMMEDIATE = Terminal("IMMEDIATE", "immediate")
IN = Terminal("IN", "in")
INDEX = Terminal("INDEX", "index")
INDICATOR = Terminal("INDICATOR", "indicator")
INITIALLY = Terminal("INITIALLY", "initially")
INNER = Terminal("INNER", "inner")
INSERT = Terminal("INSERT", "insert")
INSTEAD = Terminal("INSTEAD", "instead")
INTEGER = Terminal("INTEGER", "integer")
INTO = Terminal("INTO", "into")
IS = Terminal("IS", "is")
ISNULL = Terminal("ISNULL", "isnull") # ??
JOIN = Terminal("JOIN", "join")
KEY = Terminal("KEY", "key")
LANGUAGE = Terminal("LANGUAGE", "language")
LEFT = Terminal("LEFT", "left")
LIKE = Terminal("LIKE", "like")
LT = Terminal("LT", "<")
LT2 = Terminal("LT2", "<<")
LT_EQ = Terminal("LT_EQ", "<=")
MATCH = Terminal("MATCH", "match")
NATURAL = Terminal("NATURAL", "natural")
NO = Terminal("NO", "no")
NOTHING = Terminal("NOTHING", "nothing")
NOTNULL = Terminal("NOTNULL", "notnull") # ??
NOT_EQ = Terminal("NOT_EQ1", Re.literal("!=") | Re.literal("<>"))
NULL = Terminal("NULL", "null")
NUMERIC = Terminal("NUMERIC", "numeric")
OF = Terminal("OF", "of")
ON = Terminal("ON", "on")
OPEN = Terminal("OPEN", "open")
OPTION = Terminal("OPTION", "option")
ORDER = Terminal("ORDER", "order")
OUTER = Terminal("OUTER", "outer")
PARAMETER = Terminal("PARAMETER", "parameter")
PERCENT = Terminal("PERCENT", "%")
PIPE = Terminal("PIPE", "|")
PIPE2 = Terminal("PIPE2", "||")
PLAN = Terminal("PLAN", "plan")
PRAGMA = Terminal("PRAGMA", "pragma")
PRECISION = Terminal("PRECISION", "precision")
PRIMARY = Terminal("PRIMARY", "primary")
PRIVILEGES = Terminal("PRIVILEGES", "privileges")
PROCEDURE = Terminal("PROCEDURE", "procedure")
PUBLIC = Terminal("PUBLIC", "public")
QUERY = Terminal("QUERY", "query")
RAISE = Terminal("RAISE", "raise")
REAL = Terminal("REAL", "real")
REFERENCES = Terminal("REFERENCES", "references")
REGEXP = Terminal("REGEXP", "regexp")
REINDEX = Terminal("REINDEX", "reindex")
RELEASE = Terminal("RELEASE", "release")
RENAME = Terminal("RENAME", "rename")
REPLACE = Terminal("REPLACE", "replace")
RESTRICT = Terminal("RESTRICT", "restrict")
RETURNING = Terminal("RETURNING", "returning")
RIGHT = Terminal("RIGHT", "right")
ROLLBACK = Terminal("ROLLBACK", "rollback")
ROW = Terminal("ROW", "row")
SAVEPOINT = Terminal("SAVEPOINT", "savepoint")
SCHEMA = Terminal("SCHEMA", "schema")
SELECT = Terminal("SELECT", "select")
SET = Terminal("SET", "set")
SMALLINT = Terminal("SMALLINT", "smallint")
SOME = Terminal("SOME", "some")
SQLCODE = Terminal("SQLCODE", "sqlcode")
SQLERROR = Terminal("SQLERROR", "sqlerror")
STORED = Terminal("STORED", "stored")
TABLE = Terminal("TABLE", "table")
TEMP = Terminal("TEMP", "temp")
TEMPORARY = Terminal("TEMPORARY", "temporary")
THEN = Terminal("THEN", "then")
TO = Terminal("TO", "to")
TRANSACTION = Terminal("TRANSACTION", "transaction")
TRIGGER = Terminal("TRIGGER", "trigger")
TRUE = Terminal("TRUE", "true")
UNION = Terminal("UNION", "union")
UNIQUE = Terminal("UNIQUE", "unique")
UPDATE = Terminal("UPDATE", "update")
USER = Terminal("USER", "user")
USING = Terminal("USING", "using")
VALUES = Terminal("VALUES", "values")
VIEW = Terminal("VIEW", "view")
VIRTUAL = Terminal("VIRTUAL", "virtual")
WHEN = Terminal("WHEN", "when")
WHENEVER = Terminal("WHENEVER", "whenever")
WHERE = Terminal("WHERE", "where")
WITH = Terminal("WITH", "with")
WITHOUT = Terminal("WITHOUT", "without")
WORK = Terminal("WORK", "work")
INTERSECT = Terminal("INTERSECT", "intersect")
EXCEPT = Terminal("EXCEPT", "except")
INDEXED = Terminal("INDEXED", "indexed")
VACUUM = Terminal("VACUUM", "vacuum")
FILTER = Terminal("FILTER", "filter")
PARTITION = Terminal("PARTITION", "partition")
EXCLUDE = Terminal("EXCLUDE", "exclude")
OTHERS = Terminal("OTHERS", "others")
TIES = Terminal("TIES", "ties")
RANGE = Terminal("RANGE", "range")
ROWS = Terminal("ROWS", "rows")
GROUPS = Terminal("GROUPS", "groups")
OVER = Terminal("OVER", "over")
RECURSIVE = Terminal("RECURSIVE", "recursive")
LIMIT = Terminal("LIMIT", "limit")
OFFSET = Terminal("OFFSET", "offset")
FIRST_VALUE = Terminal("FIRST_VALUE", "first_value")
LAST_VALUE = Terminal("LAST_VALUE", "last_value")
CUME_DIST = Terminal("CUME_DIST", "cume_dist")
PERCENT_RANK = Terminal("PERCENT_RANK", "percent_rank")
TILDE = Terminal("TILDE", "tilde")
DENSE_RANK = Terminal("DENSE_RANK", "dense_rank")
RANK = Terminal("RANK", "rank")
ROW_NUMBER = Terminal("ROW_NUMBER", "row_number")
LAG = Terminal("LAG", "lag")
LEAD = Terminal("LEAD", "lead")
NTH_VALUE = Terminal("NTH_VALUE", "nth_value")
NTILE = Terminal("NTILE", "ntile")
WINDOW = Terminal("WINDOW", "window")
DO = Terminal("DO", "do")
SEMICOLON = Terminal("SEMICOLON", ";")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
COMMA = Terminal("COMMA", ",")
EQUAL = Terminal("EQUAL", "=")
DOT = Terminal("DOT", ".")
BLANKS = Terminal("BLANKS", Re.set(" ", "\t").plus())
LINE_BREAK = Terminal("LINE_BREAK", Re.set("\r", "\n"), trivia_mode=TriviaMode.NewLine)
COMMENT = Terminal(
"COMMENT",
Re.seq(Re.literal("--"), Re.set("\n").invert().star()),
highlight=highlight.comment.line,
trivia_mode=TriviaMode.LineComment,
)
@rule
def sql_list():
return alt(
sql + SEMICOLON,
sql_list + sql + SEMICOLON,
)
@rule
def sql():
return opt(EXPLAIN + opt(QUERY + PLAN)) + alt(
alter_table_stmt,
analyze_stmt,
attach_stmt,
begin_stmt,
commit_stmt,
create_index_stmt,
create_table_stmt,
create_trigger_stmt,
create_view_stmt,
create_virtual_table_stmt,
delete_stmt,
delete_stmt_limited,
detach_stmt,
drop_stmt,
insert_stmt,
pragma_stmt,
reindex_stmt,
release_stmt,
rollback_stmt,
savepoint_stmt,
select_stmt,
update_stmt,
update_stmt_limited,
vacuum_stmt,
)
@rule
def alter_table_stmt():
return (
ALTER
+ TABLE
+ opt(name + DOT)
+ name
+ alt(
RENAME + alt((TO + name), (COLUMN + name + TO + name)),
(ADD + opt(COLUMN) + column_def),
(DROP + opt(COLUMN) + name),
)
)
@rule
def analyze_stmt():
return ANALYZE + opt(opt(name + DOT) + name)
@rule
def attach_stmt():
return ATTACH + opt(DATABASE) + expr + AS + name
@rule
def begin_stmt():
return BEGIN + opt(DEFERRED | IMMEDIATE | EXCLUSIVE) + opt(TRANSACTION + opt(name))
@rule
def commit_stmt():
return (COMMIT | END) + opt(TRANSACTION)
@rule
def rollback_stmt():
return ROLLBACK + opt(TRANSACTION) + opt(TO + opt(SAVEPOINT) + name)
@rule
def savepoint_stmt():
return SAVEPOINT + name
@rule
def release_stmt():
return RELEASE + opt(SAVEPOINT) + name
def comma_list(*rules: Rule) -> Rule:
"""A list of `rule` separated by commas. Must have at least one, no trailing comma."""
rule = seq(*rules)
return seq(rule, zero_or_more(COMMA, rule))
@rule
def create_index_stmt():
return seq(
CREATE,
opt(UNIQUE),
INDEX,
opt(IF + NOT + EXISTS),
opt(name + DOT),
name,
ON,
name,
LPAREN,
comma_list(indexed_column),
RPAREN,
opt(WHERE + expr),
)
@rule
def indexed_column():
return (name | expr) + opt(COLLATE + name) + opt(asc_desc)
@rule
def create_table_stmt():
return seq(
CREATE,
opt(TEMP | TEMPORARY),
TABLE,
opt(IF, NOT, EXISTS),
opt(name, DOT),
name,
alt(
seq(
LPAREN,
comma_list(column_def),
zero_or_more(COMMA, table_constraint),
RPAREN,
opt(WITHOUT, IDENTIFIER),
),
seq(AS, select_stmt),
),
)
@rule
def column_def():
return name + opt(type_name) + zero_or_more(column_constraint)
@rule
def type_name():
return name + opt(LPAREN, signed_number, opt(COMMA, signed_number), RPAREN)
@rule
def column_constraint():
return seq(
opt(CONSTRAINT, name),
alt(
seq(PRIMARY, KEY, opt(asc_desc), opt(conflict_clause), opt(AUTOINCREMENT)),
seq(opt(NOT), (NULL | UNIQUE), opt(conflict_clause)),
seq(DEFAULT, signed_number | literal_value | seq(LPAREN, expr, RPAREN)),
seq(COLLATE, name),
foreign_key_clause,
seq(opt(GENERATED, ALWAYS), AS, LPAREN, expr, RPAREN, opt(STORED | VIRTUAL)),
),
)
@rule
def signed_number():
return opt(PLUS | MINUS) + NUMERIC_LITERAL
@rule
def table_constraint():
return seq(
opt(CONSTRAINT, name),
alt(
seq(
(PRIMARY + KEY | UNIQUE),
LPAREN,
comma_list(indexed_column),
RPAREN,
opt(conflict_clause),
),
seq(CHECK, LPAREN, expr, RPAREN),
seq(
FOREIGN,
KEY,
LPAREN,
comma_list(name),
RPAREN,
foreign_key_clause,
),
),
)
@rule
def foreign_key_clause():
return seq(
REFERENCES,
name,
opt(LPAREN, comma_list(name), RPAREN),
zero_or_more(
alt(
seq(
ON,
(DELETE | UPDATE),
alt(
SET + (NULL | DEFAULT),
CASCADE,
RESTRICT,
NO + ACTION,
),
),
MATCH + name,
),
),
opt(opt(NOT), DEFERRABLE, opt(INITIALLY, (DEFERRED | IMMEDIATE))),
)
@rule
def conflict_clause():
return seq(ON, CONFLICT, ROLLBACK | ABORT | FAIL | IGNORE | REPLACE)
@rule
def create_trigger_stmt():
return seq(
CREATE,
opt(TEMP | TEMPORARY),
TRIGGER,
opt(IF, NOT, EXISTS),
opt(name, DOT),
name,
opt(BEFORE | AFTER | (INSTEAD + OF)),
(DELETE | INSERT | (UPDATE + opt(OF, comma_list(name)))),
ON,
name,
opt(FOR, EACH, ROW),
opt(WHEN, expr),
BEGIN,
one_or_more((update_stmt | insert_stmt | delete_stmt | select_stmt), SEMICOLON),
END,
)
@rule
def create_view_stmt():
return seq(
CREATE,
opt(TEMP | TEMPORARY),
VIEW,
opt(IF, NOT, EXISTS),
opt(name, DOT),
name,
opt(LPAREN, comma_list(name), RPAREN),
AS,
select_stmt,
)
@rule
def create_virtual_table_stmt():
return seq(
CREATE,
VIRTUAL,
TABLE,
opt(IF, NOT, EXISTS),
opt(name, DOT),
name,
USING,
name,
opt(LPAREN, comma_list(module_argument), RPAREN),
)
@rule
def with_clause():
return seq(
WITH,
opt(RECURSIVE),
comma_list(cte_table_name, AS, LPAREN, select_stmt, RPAREN),
)
@rule
def cte_table_name():
return name + opt(LPAREN, comma_list(name), RPAREN)
@rule
def recursive_cte():
return seq(
cte_table_name,
AS,
LPAREN,
initial_select,
UNION,
opt(ALL),
recursive_select,
RPAREN,
)
@rule
def common_table_expression():
return seq(
name,
opt(LPAREN, comma_list(name), RPAREN),
AS,
LPAREN,
select_stmt,
RPAREN,
)
@rule
def delete_stmt():
return seq(
opt(with_clause),
DELETE,
FROM,
qualified_table_name,
opt(WHERE, expr),
opt(returning_clause),
)
@rule
def delete_stmt_limited():
return seq(
opt(with_clause),
DELETE,
FROM,
qualified_table_name,
opt(WHERE, expr),
opt(returning_clause),
opt(
opt(order_by_stmt),
limit_stmt,
),
)
@rule
def detach_stmt():
return DETACH + opt(DATABASE) + name
@rule
def drop_stmt():
return seq(
DROP,
(INDEX | TABLE | TRIGGER | VIEW),
opt(IF, EXISTS),
opt(name, DOT),
name,
)
#
# SQLite understands the following binary operators, in order from highest to lowest precedence:
# ||
# * / %
# + -
# << >> & |
# < <= > >=
# = == != <> IS IS NOT IS DISTINCT FROM IS NOT DISTINCT FROM IN LIKE GLOB MATCH REGEXP
# AND
# OR
#
@rule
def expr():
return alt(
literal_value,
BIND_PARAMETER,
opt(opt(name, DOT), name, DOT) + name,
unary_operator + expr,
expr + PIPE2 + expr,
expr + (STAR | SLASH | PERCENT) + expr,
expr + (PLUS | MINUS) + expr,
expr + (LT2 | GT2 | AMPERSAND | PIPE) + expr,
expr + (LT | LT_EQ | GT | GT_EQ) + expr,
seq(
expr,
alt(
EQUAL,
EQUALEQUAL,
NOT_EQ,
IS,
seq(IS, NOT),
seq(IS, opt(NOT), DISTINCT, FROM),
IN,
LIKE,
GLOB,
MATCH,
REGEXP,
),
expr,
),
expr + AND + expr,
expr + OR + expr,
seq(
name,
LPAREN,
opt((opt(DISTINCT) + comma_list(expr)) | STAR),
RPAREN,
opt(filter_clause),
opt(over_clause),
),
LPAREN + comma_list(expr) + RPAREN,
CAST + LPAREN + expr + AS + type_name + RPAREN,
expr + COLLATE + name,
expr + opt(NOT) + (LIKE | GLOB | REGEXP | MATCH) + expr + opt(ESCAPE, expr),
expr + (ISNULL | NOTNULL | seq(NOT, NULL)),
expr + IS + opt(NOT) + expr,
expr + opt(NOT) + BETWEEN + expr + AND + expr,
seq(
expr,
opt(NOT),
IN,
alt(
LPAREN + opt(select_stmt | comma_list(expr)) + RPAREN,
opt(name, DOT) + name,
seq(
opt(name, DOT),
name,
LPAREN,
opt(comma_list(expr)),
RPAREN,
),
),
),
opt(opt(NOT), EXISTS) + LPAREN + select_stmt + RPAREN,
CASE + opt(expr) + one_or_more(WHEN, expr, THEN, expr) + opt(ELSE, expr) + END,
raise_function,
)
@rule
def raise_function():
return seq(
RAISE, LPAREN, (IGNORE | seq((ROLLBACK | ABORT | FAIL), COMMA, error_message)), RPAREN
)
@rule
def literal_value():
return alt(
NUMERIC_LITERAL,
STRING_LITERAL,
BLOB_LITERAL,
NULL,
TRUE,
FALSE,
CURRENT_TIME,
CURRENT_DATE,
CURRENT_TIMESTAMP,
)
@rule
def value_row():
return LPAREN + comma_list(expr) + RPAREN
@rule
def values_clause():
return VALUES + comma_list(value_row)
@rule
def insert_stmt():
return seq(
opt(with_clause),
INSERT | REPLACE | seq(INSERT, OR, REPLACE | ROLLBACK | ABORT | FAIL | IGNORE),
INTO,
opt(name, DOT),
name,
opt(AS, name),
opt(LPAREN, comma_list(name), RPAREN),
(((values_clause | select_stmt) + opt(upsert_clause)) | seq(DEFAULT, VALUES)),
opt(returning_clause),
)
@rule
def returning_clause():
return RETURNING + comma_list(result_column)
@rule
def upsert_clause():
return seq(
ON,
CONFLICT,
opt(LPAREN, comma_list(indexed_column), RPAREN, opt(WHERE, expr)),
DO,
alt(
NOTHING,
seq(
UPDATE,
SET,
comma_list((name | column_name_list), EQUAL, expr),
opt(WHERE, expr),
),
),
)
@rule
def pragma_stmt():
return seq(
PRAGMA,
opt(name, DOT),
name,
opt((EQUAL + pragma_value) | (LPAREN + pragma_value + RPAREN)),
)
@rule
def pragma_value():
return signed_number | name | STRING_LITERAL
@rule
def reindex_stmt():
return REINDEX + opt(name | (opt(name, DOT) + (name | name)))
@rule
def select_stmt():
return seq(
opt(common_table_stmt),
select_core,
zero_or_more(compound_operator, select_core),
opt(order_by_stmt),
opt(limit_stmt),
)
@rule
def join_clause():
return table_or_subquery + zero_or_more(join_operator, table_or_subquery, opt(join_constraint))
@rule
def select_core():
return alt(
seq(
SELECT,
opt(DISTINCT | ALL),
comma_list(result_column),
opt(FROM, comma_list(table_or_subquery) | join_clause),
opt(WHERE, expr),
opt(GROUP, BY, comma_list(expr), opt(HAVING, expr)),
opt(WINDOW, comma_list(name, AS, window_defn)),
),
values_clause,
)
@rule
def factored_select_stmt():
return select_stmt
@rule
def simple_select_stmt():
return opt(common_table_stmt) + select_core + opt(order_by_stmt) + opt(limit_stmt)
@rule
def compound_select_stmt():
return seq(
opt(common_table_stmt),
select_core,
one_or_more((UNION + ALL) | INTERSECT | EXCEPT, select_core),
opt(order_by_stmt),
opt(limit_stmt),
)
@rule
def table_or_subquery():
return alt(
seq(
opt(name, DOT),
name,
opt(opt(AS), name),
opt(seq(INDEXED, BY, name) | (NOT + INDEXED)),
),
seq(
opt(name, DOT),
name,
LPAREN,
comma_list(expr),
RPAREN,
opt(AS, name),
),
seq(LPAREN, comma_list(table_or_subquery) | join_clause, RPAREN),
seq(LPAREN, select_stmt, RPAREN, opt(opt(AS), name)),
)
@rule
def result_column():
return STAR | seq(name, DOT, STAR) | seq(expr, opt(opt(AS), column_alias))
@rule
def join_operator():
return alt(
COMMA,
seq(opt(NATURAL), opt(seq(LEFT | RIGHT | FULL, opt(OUTER)) | INNER | CROSS), JOIN),
)
@rule
def join_constraint():
return alt(
ON + expr,
USING + LPAREN + comma_list(name) + RPAREN,
)
@rule
def compound_operator():
return UNION + opt(ALL) | INTERSECT | EXCEPT
@rule
def update_stmt():
return seq(
opt(with_clause),
UPDATE,
opt(OR, ROLLBACK | ABORT | REPLACE | FAIL | IGNORE),
qualified_table_name,
SET,
comma_list(name | column_name_list, EQUAL, expr),
opt(FROM, comma_list(table_or_subquery) | join_clause),
opt(WHERE, expr),
opt(returning_clause),
)
@rule
def column_name_list():
return LPAREN + comma_list(name) + RPAREN
@rule
def update_stmt_limited():
return seq(
opt(with_clause),
UPDATE,
opt(OR, ROLLBACK | ABORT | REPLACE | FAIL | IGNORE),
qualified_table_name,
SET,
comma_list(name | column_name_list, EQUAL, expr),
opt(WHERE, expr),
opt(returning_clause),
opt(opt(order_by_stmt), limit_stmt),
)
@rule
def qualified_table_name():
return seq(
opt(name, DOT),
name,
opt(AS, name),
opt(INDEXED + BY + name | NOT + INDEXED),
)
@rule
def vacuum_stmt():
return VACUUM + opt(name) + opt(INTO, name)
@rule
def filter_clause():
return FILTER + LPAREN + WHERE + expr + RPAREN
@rule
def window_defn():
return seq(
LPAREN,
opt(name),
opt(PARTITION, BY, comma_list(expr)),
ORDER,
BY,
comma_list(ordering_term),
opt(frame_spec),
RPAREN,
)
@rule
def over_clause():
return seq(
OVER,
alt(
name,
seq(
LPAREN,
opt(name),
opt(PARTITION, BY, comma_list(expr)),
opt(ORDER, BY, comma_list(ordering_term)),
opt(frame_spec),
RPAREN,
),
),
)
@rule
def frame_spec():
return frame_clause + opt(EXCLUDE, NO + OTHERS | CURRENT + ROW | GROUP | TIES)
@rule
def frame_clause():
return seq(
RANGE | ROWS | GROUPS,
frame_single | seq(BETWEEN, frame_left, AND, frame_right),
)
@rule
def simple_function_invocation():
return seq(name, LPAREN, comma_list(expr) | STAR, RPAREN)
@rule
def aggregate_function_invocation():
return seq(
name,
LPAREN,
opt(opt(DISTINCT), comma_list(expr) | STAR),
RPAREN,
opt(filter_clause),
)
@rule
def window_function_invocation():
return seq(
window_function,
LPAREN,
opt(comma_list(expr) | STAR),
RPAREN,
opt(filter_clause),
OVER,
window_defn | name,
)
@rule
def common_table_stmt():
return seq(WITH, opt(RECURSIVE), comma_list(common_table_expression))
@rule
def order_by_stmt():
return seq(ORDER, BY, comma_list(ordering_term))
@rule
def limit_stmt():
return seq(LIMIT, expr, opt(OFFSET | COMMA, expr))
NULLS = Terminal("NULLS", "nulls")
FIRST = Terminal("FIRST", "first")
LAST = Terminal("LAST", "last")
@rule
def ordering_term():
return seq(expr, opt(COLLATE, name), opt(asc_desc), opt(NULLS, FIRST | LAST))
@rule
def asc_desc():
return ASC | DESC
PRECEDING = Terminal("PRECEDING", "preceding")
FOLLOWING = Terminal("FOLLOWING", "following")
UNBOUNDED = Terminal("UNBOUNDED", "unbounded")
@rule
def frame_left():
return alt(
expr + PRECEDING,
expr + FOLLOWING,
CURRENT + ROW,
UNBOUNDED + PRECEDING,
)
@rule
def frame_right():
return alt(
expr + PRECEDING,
expr + FOLLOWING,
CURRENT + ROW,
UNBOUNDED + FOLLOWING,
)
@rule
def frame_single():
return alt(
expr + PRECEDING,
UNBOUNDED + PRECEDING,
CURRENT + ROW,
)
@rule
def window_function():
return alt(
seq(
FIRST_VALUE | LAST_VALUE,
seq(LPAREN, expr, RPAREN),
OVER,
seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, opt(frame_clause), RPAREN),
),
seq(
CUME_DIST | PERCENT_RANK,
seq(LPAREN, RPAREN),
OVER,
seq(LPAREN, opt(partition_by), opt(order_by_expr), RPAREN),
),
seq(
DENSE_RANK | RANK | ROW_NUMBER,
seq(LPAREN, RPAREN),
OVER,
seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN),
),
seq(
LAG | LEAD,
seq(LPAREN, expr, opt(offset), opt(default_value), RPAREN),
OVER,
seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN),
),
seq(
NTH_VALUE,
seq(LPAREN, expr, COMMA, signed_number, RPAREN),
OVER,
seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, opt(frame_clause), RPAREN),
),
seq(
NTILE,
seq(LPAREN, expr, RPAREN),
OVER,
seq(LPAREN, opt(partition_by), order_by_expr_asc_desc, RPAREN),
),
)
@rule
def offset():
return COMMA + signed_number
@rule
def default_value():
return COMMA + signed_number
@rule
def partition_by():
return PARTITION + BY + one_or_more(expr)
@rule
def order_by_expr():
return ORDER + BY + one_or_more(expr)
@rule
def order_by_expr_asc_desc():
return ORDER + BY + expr_asc_desc
@rule
def expr_asc_desc():
return comma_list(expr, opt(asc_desc))
# TODO BOTH OF THESE HAVE TO BE REWORKED TO FOLLOW THE SPEC
@rule
def initial_select():
return select_stmt
@rule
def recursive_select():
return select_stmt
@rule
def unary_operator():
return MINUS | PLUS | TILDE | NOT
@rule
def error_message():
return STRING_LITERAL
@rule
def module_argument(): # TODO check what exactly is permitted here
return expr | column_def
@rule
def column_alias():
return IDENTIFIER | STRING_LITERAL
@rule
def keyword():
return alt(
ABORT,
ACTION,
ADD,
AFTER,
ALL,
ALTER,
ANALYZE,
AND,
AS,
ASC,
ATTACH,
AUTOINCREMENT,
BEFORE,
BEGIN,
BETWEEN,
BY,
CASCADE,
CASE,
CAST,
CHECK,
COLLATE,
COLUMN,
COMMIT,
CONFLICT,
CONSTRAINT,
CREATE,
CROSS,
CURRENT_DATE,
CURRENT_TIME,
CURRENT_TIMESTAMP,
DATABASE,
DEFAULT,
DEFERRABLE,
DEFERRED,
DELETE,
DESC,
DETACH,
DISTINCT,
DROP,
EACH,
ELSE,
END,
ESCAPE,
EXCEPT,
EXCLUSIVE,
EXISTS,
EXPLAIN,
FAIL,
FOR,
FOREIGN,
FROM,
FULL,
GLOB,
GROUP,
HAVING,
IF,
IGNORE,
IMMEDIATE,
IN,
INDEX,
INDEXED,
INITIALLY,
INNER,
INSERT,
INSTEAD,
INTERSECT,
INTO,
IS,
ISNULL,
JOIN,
KEY,
LEFT,
LIKE,
LIMIT,
MATCH,
NATURAL,
NO,
NOT,
NOTNULL,
NULL,
OF,
OFFSET,
ON,
OR,
ORDER,
OUTER,
PLAN,
PRAGMA,
PRIMARY,
QUERY,
RAISE,
RECURSIVE,
REFERENCES,
REGEXP,
REINDEX,
RELEASE,
RENAME,
REPLACE,
RESTRICT,
RIGHT,
ROLLBACK,
ROW,
ROWS,
SAVEPOINT,
SELECT,
SET,
TABLE,
TEMP,
TEMPORARY,
THEN,
TO,
TRANSACTION,
TRIGGER,
UNION,
UNIQUE,
UPDATE,
USING,
VACUUM,
VALUES,
VIEW,
VIRTUAL,
WHEN,
WHERE,
WITH,
WITHOUT,
FIRST_VALUE,
OVER,
PARTITION,
RANGE,
PRECEDING,
UNBOUNDED,
CURRENT,
FOLLOWING,
CUME_DIST,
DENSE_RANK,
LAG,
LAST_VALUE,
LEAD,
NTH_VALUE,
NTILE,
PERCENT_RANK,
RANK,
ROW_NUMBER,
GENERATED,
ALWAYS,
STORED,
TRUE,
FALSE,
WINDOW,
NULLS,
FIRST,
LAST,
FILTER,
GROUPS,
EXCLUDE,
)
# TODO: check all names below
@rule
def name():
return IDENTIFIER | keyword | STRING_LITERAL | seq(LPAREN, name, RPAREN)
SQL = Grammar(
start=sql_list,
precedence=[
(Assoc.LEFT, [OR]),
(Assoc.LEFT, [AND]),
(Assoc.LEFT, [NOT]),
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
# TODO: Unary minus
],
trivia=[BLANKS, COMMENT, LINE_BREAK],
name="SQL",
)
def emit_yacc(path: str, grammar: Grammar):
lines = []
token_names = [t.name for t in grammar.terminals()]
token_names.sort()
trivia = {t.name for t in grammar.trivia_terminals()}
buf = ""
for tn in token_names:
if tn in trivia:
continue
if len(buf) > 0:
buf += " "
buf += tn
if len(buf) >= 73:
lines.append(f"%token {buf}")
buf = ""
if len(buf) > 0:
lines.append(f"%token {buf}")
lines.append("")
prec = grammar.precedence()
if len(prec) > 0:
for assoc, rules in prec:
match assoc:
case Assoc.LEFT:
line = "%left "
case Assoc.RIGHT:
line = "%right "
case Assoc.NONE:
line = "%nonassoc"
case _:
typing.assert_never(assoc)
rns = " ".join([rule.name for rule in rules])
lines.append(f"{line} {rns}")
lines.append("")
lines.append(f"%start {grammar.start.name}")
lines.append("")
lines.append("%%")
for nt in grammar.non_terminals():
for rule in nt.body:
prod = " ".join([s.name for s in rule])
lines.append(f"{nt.name}: {prod};")
lines.append("")
lines.append("%%")
with open(path, "w", encoding="utf8") as file:
file.writelines([f"{l}\n" for l in lines])
def emit_lex(path: str, grammar: Grammar):
def to_js_string(s: str) -> str:
result = json.dumps(s)[1:-1]
# JSON escapes double-quotes but we don't need to in our context.
result = result.replace('\\"', '"')
return result
def to_lex_regex(re: parser.Re) -> str:
# NOTE: In general it's bad to introduce parenthesis into regular
# expressions where they're not required because they also create
# capture groups, but I think it doesn't apply to tree-sitter
# regular expressions (and it doesn't mean anything to me either.)
if isinstance(re, parser.ReSeq):
final = []
queue = []
queue.append(re)
while len(queue) > 0:
part = queue.pop()
if isinstance(part, parser.ReSeq):
queue.append(part.right)
queue.append(part.left)
else:
final.append(part)
s = "".join([to_lex_regex(p) for p in final])
if len(final) > 1:
s = f"({s})"
return s
elif isinstance(re, parser.ReAlt):
final = []
queue = []
queue.append(re)
while len(queue) > 0:
part = queue.pop()
if isinstance(part, parser.ReAlt):
queue.append(part.right)
queue.append(part.left)
else:
final.append(part)
s = "|".join([to_lex_regex(p) for p in final])
if len(final) > 1:
s = f"({s})"
return s
elif isinstance(re, parser.ReQuestion):
s = to_lex_regex(re.child)
return f"({s})?"
elif isinstance(re, parser.RePlus):
s = to_lex_regex(re.child)
return f"({s})+"
elif isinstance(re, parser.ReStar):
s = to_lex_regex(re.child)
return f"({s})*"
elif isinstance(re, parser.ReSet):
if (
len(re.values) == 1
and re.values[0].lower == 0
and re.values[0].upper == parser.UNICODE_MAX_CP
):
return "."
inverted = re.inversion
if inverted:
re = re.invert()
parts = []
for value in re.values:
if len(value) == 1:
parts.append(to_js_string(chr(value.lower)))
else:
parts.append(
"{}-{}".format(
to_js_string(chr(value.lower)),
to_js_string(chr(value.upper - 1)),
)
)
s = "".join(parts)
if inverted:
s = "^" + s
if len(s) > 1:
# The only time this isn't a "set" is if this is a set of one
# range that is one character long, in which case it's better
# represented as a literal.
s = f"[{s}]"
# else:
# s = s.replace("'", "\\'")
# s = f"'{s}'"
return s
raise Exception(f"Regex node {re} not supported for tree-sitter")
lines = ["%%"]
trivia = {t.name for t in grammar.trivia_terminals()}
for terminal in grammar.terminals():
if isinstance(terminal.pattern, str):
pattern = terminal.pattern
else:
pattern = to_lex_regex(terminal.pattern)
name = ";" if terminal.name in trivia else f'"{terminal.name}"'
lines.append(f"{pattern} {name}")
with open(path, "w", encoding="utf8") as file:
file.writelines([f"{l}\n" for l in lines])
if __name__ == "__main__":
# import cProfile
# print("Starting...")
# with cProfile.Profile() as pr:
# try:
# SQL.build_table()
# finally:
# pr.dump_stats("sql.pprof")
# print("Wrote output to sql.pprof")
emit_yacc("sql.y", SQL)
emit_lex("sql.l", SQL)