From 1aa85cc295f32e253a9803501f8ab1a4a4dd10e1 Mon Sep 17 00:00:00 2001 From: John Doty Date: Sat, 8 Feb 2025 08:08:15 -0800 Subject: [PATCH] Fix the examples in the dingus --- dingus/dingus.js | 486 +++++++++++++++++++++++-------------------- examples/sqlsorta.py | 122 +++++++++++ 2 files changed, 385 insertions(+), 223 deletions(-) create mode 100644 examples/sqlsorta.py diff --git a/dingus/dingus.js b/dingus/dingus.js index 4e28088..0754d60 100644 --- a/dingus/dingus.js +++ b/dingus/dingus.js @@ -338,109 +338,125 @@ const EXAMPLES = [ # A silly little SQL grammar. Incomplete, but you get it, right? from parser import * -class SQLGrammar(Grammar): - start = "query" - trivia = ["BLANKS"] - precedence = [ - ] +@rule +def query(): + return select_clause + opt(from_clause) - @rule - def query(self): - return self.select_clause + opt(self.from_clause) - @rule - def select_clause(self): - return self.SELECT + self.select_column_list +@rule +def select_clause(): + return SELECT + select_column_list - @rule(transparent=True) - def select_column_list(self): - return alt( - self.column_spec, - self.select_column_list + self.COMMA + self.column_spec, - ) - @rule - def column_spec(self): - return alt( - self.STAR, - self.expression + opt(self.alias), - ) - - @rule - def alias(self): - return self.AS + self.NAME - - @rule - def from_clause(self): - return self.FROM + self.table_list - - @rule(transparent=True) - def table_list(self): - return ( - self.table_clause | - (self.table_list + self.COMMA + self.table_clause) - ) - - @rule - def table_clause(self): - return alt( - self.table_expression + opt(self.alias), - self.join_clause, - ) - - @rule - def table_expression(self): - return alt( - self.NAME, - self.LPAREN + self.query + self.RPAREN, - ) - - @rule - def join_clause(self): - return self.join_type + self.table_expression + self.ON + self.expression - - @rule - def join_type(self): - return opt(alt( - opt(alt(self.LEFT, self.RIGHT)) + self.OUTER, - self.INNER, - self.CROSS, - )) + self.JOIN - - @rule - def expression(self): - return self.NAME - - BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus()) - - # TODO: Case insensitivity? I don't know if I care- this grammar - # tool is more about new languages than parsing existing ones, - # and this SQL grammar is just a demo. Do people want case - # ignoring lexers? - SELECT = Terminal("select") - AS = Terminal("as") - COMMA = Terminal(",") - STAR = Terminal("*") - FROM = Terminal("from") - WHERE = Terminal("where") - LPAREN = Terminal("(") - RPAREN = Terminal(")") - - LEFT = Terminal("left") - RIGHT = Terminal("right") - OUTER = Terminal("outer") - INNER = Terminal("inner") - CROSS = Terminal("cross") - JOIN = Terminal("join") - ON = Terminal("on") - - NAME = Terminal( - Re.seq( - Re.set(("a", "z"), ("A", "Z"), "_"), - Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), - ), +@rule(transparent=True) +def select_column_list(): + return alt( + column_spec, + select_column_list + COMMA + column_spec, ) + + +@rule +def column_spec(): + return alt( + STAR, + expression + opt(alias), + ) + + +@rule +def alias(): + return AS + NAME + + +@rule +def from_clause(): + return FROM + table_list + + +@rule(transparent=True) +def table_list(): + return table_clause | (table_list + COMMA + table_clause) + + +@rule +def table_clause(): + return alt( + table_expression + opt(alias), + join_clause, + ) + + +@rule +def table_expression(): + return alt( + NAME, + LPAREN + query + RPAREN, + ) + + +@rule +def join_clause(): + return join_type + table_expression + ON + expression + + +@rule +def join_type(): + return ( + opt( + alt( + opt(alt(LEFT, RIGHT)) + OUTER, + INNER, + CROSS, + ) + ) + + JOIN + ) + + +@rule +def expression(): + return NAME + + +BLANKS = Terminal("BLANKS", Re.set(" ", "\\t", "\\r", "\\n").plus()) + +# TODO: Case insensitivity? I don't know if I care- this grammar +# tool is more about new languages than parsing existing ones, +# and this SQL grammar is just a demo. Do people want case +# ignoring lexers? +SELECT = Terminal("SELECT", "select") +AS = Terminal("AS", "as") +COMMA = Terminal("COMMA", ",") +STAR = Terminal("STAR", "*") +FROM = Terminal("FROM", "from") +WHERE = Terminal("WHERE", "where") +LPAREN = Terminal("LPAREN", "(") +RPAREN = Terminal("RPAREN", ")") + +LEFT = Terminal("LEFT", "left") +RIGHT = Terminal("RIGHT", "right") +OUTER = Terminal("OUTER", "outer") +INNER = Terminal("INNER", "inner") +CROSS = Terminal("CROSS", "cross") +JOIN = Terminal("JOIN", "join") +ON = Terminal("ON", "on") + +NAME = Terminal( + "NAME", + Re.seq( + Re.set(("a", "z"), ("A", "Z"), "_"), + Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + ), +) + +SQL = Grammar( + start=query, + precedence=[], + trivia=[BLANKS], + name="SQL", +) `, "input": ` select @@ -461,134 +477,158 @@ from # https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html from parser import * -class LGrammar(Grammar): - start = "File" - trivia = ["BLANKS"] +@rule +def File(): + # TODO: Make lists easier + return _functions + + +@rule +def _functions(): + return Function | (_functions + Function) + + +@rule +def Function(): + return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block + + +@rule +def ParamList(): + return LPAREN + opt(_parameters) + RPAREN + + +@rule +def _parameters(): + # NOTE: The ungrammar in the reference does not talk about commas required between parameters + # so this massages it to make them required. Commas are in the list not the param, which + # is more awkward for processing but not terminally so. + return (Param + opt(COMMA)) | (Param + COMMA + _parameters) + + +@rule +def Param(): + return NAME + COLON + TypeExpr + + +@rule +def TypeExpr(): + return NAME + + +@rule +def Block(): + return LCURLY + opt(_statements) + RCURLY + + +@rule +def _statements(): + return Stmt | _statements + Stmt + + +@rule +def Stmt(): + return StmtExpr | StmtLet | StmtReturn + + +@rule +def StmtExpr(): + return Expr + SEMICOLON + + +@rule +def StmtLet(): + return LET + NAME + EQUAL + Expr + SEMICOLON + + +@rule +def StmtReturn(): + return RETURN + Expr + SEMICOLON + + +@rule(error_name="expression") +def Expr(): + return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall + + +@rule +def ExprLiteral(): + return INT | TRUE | FALSE + + +@rule +def ExprName(): + return NAME + + +@rule +def ExprParen(): + return LPAREN + Expr + RPAREN + + +@rule +def ExprBinary(): + return Expr + (PLUS | MINUS | STAR | SLASH) + Expr + + +@rule +def ExprCall(): + return Expr + ArgList + + +@rule +def ArgList(): + return LPAREN + opt(_arg_star) + RPAREN + + +@rule +def _arg_star(): + # Again, a deviation from the original. See _parameters. + return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star) + + +BLANKS = Terminal("BLANKS", Re.set(" ", "\\t", "\\r", "\\n").plus()) + +TRUE = Terminal("TRUE", "true") +FALSE = Terminal("FALSE", "false") +INT = Terminal("INT", Re.set(("0", "9")).plus()) +FN = Terminal("FN", "fn") +ARROW = Terminal("ARROW", "->") +COMMA = Terminal("COMMA", ",") +LPAREN = Terminal("LPAREN", "(") +RPAREN = Terminal("RPAREN", ")") +LCURLY = Terminal("LCURLY", "{") +RCURLY = Terminal("RCURLY", "}") +COLON = Terminal("COLON", ":") +SEMICOLON = Terminal("SEMICOLON", ";") +LET = Terminal("LET", "let") +EQUAL = Terminal("EQUAL", "=") +RETURN = Terminal("RETURN", "return") +PLUS = Terminal("PLUS", "+") +MINUS = Terminal("MINUS", "-") +STAR = Terminal("STAR", "*") +SLASH = Terminal("SLASH", "/") + +NAME = Terminal( + "NAME", + Re.seq( + Re.set(("a", "z"), ("A", "Z"), "_"), + Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + ), +) + +LGrammar = Grammar( + name="L", + start=File, + trivia=[BLANKS], # Need a little bit of disambiguation for the symbol involved. - precedence = [ - (Assoc.LEFT, ["PLUS", "MINUS"]), - (Assoc.LEFT, ["STAR", "SLASH"]), - (Assoc.LEFT, ["LPAREN"]), - ] - - @rule - def File(self): - # TODO: Make lists easier - return self._functions - - @rule - def _functions(self): - return self.Function | (self._functions + self.Function) - - @rule - def Function(self): - return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block - - @rule - def ParamList(self): - return self.LPAREN + opt(self._parameters) + self.RPAREN - - @rule - def _parameters(self): - # NOTE: The ungrammar in the reference does not talk about commas required between parameters - # so this massages it to make them required. Commas are in the list not the param, which - # is more awkward for processing but not terminally so. - return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters) - - @rule - def Param(self): - return self.NAME + self.COLON + self.TypeExpr - - @rule - def TypeExpr(self): - return self.NAME - - @rule - def Block(self): - return self.LCURLY + opt(self._statements) + self.RCURLY - - @rule - def _statements(self): - return self.Stmt | self._statements + self.Stmt - - @rule - def Stmt(self): - return self.StmtExpr | self.StmtLet | self.StmtReturn - - @rule - def StmtExpr(self): - return self.Expr + self.SEMICOLON - - @rule - def StmtLet(self): - return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON - - @rule - def StmtReturn(self): - return self.RETURN + self.Expr + self.SEMICOLON - - @rule(error_name="expression") - def Expr(self): - return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall - - @rule - def ExprLiteral(self): - return self.INT | self.TRUE | self.FALSE - - @rule - def ExprName(self): - return self.NAME - - @rule - def ExprParen(self): - return self.LPAREN + self.Expr + self.RPAREN - - @rule - def ExprBinary(self): - return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr - - @rule - def ExprCall(self): - return self.Expr + self.ArgList - - @rule - def ArgList(self): - return self.LPAREN + opt(self._arg_star) + self.RPAREN - - @rule - def _arg_star(self): - # Again, a deviation from the original. See _parameters. - return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star) - - BLANKS = Terminal(Re.set(" ", "\\t", "\\r", "\\n").plus()) - - TRUE = Terminal("true") - FALSE = Terminal("false") - INT = Terminal(Re.set(("0", "9")).plus()) - FN = Terminal("fn") - ARROW = Terminal("->") - COMMA = Terminal(",") - LPAREN = Terminal("(") - RPAREN = Terminal(")") - LCURLY = Terminal("{") - RCURLY = Terminal("}") - COLON = Terminal(":") - SEMICOLON = Terminal(";") - LET = Terminal("let") - EQUAL = Terminal("=") - RETURN = Terminal("return") - PLUS = Terminal("+") - MINUS = Terminal("-") - STAR = Terminal("*") - SLASH = Terminal("/") - - NAME = Terminal( - Re.seq( - Re.set(("a", "z"), ("A", "Z"), "_"), - Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), - ), - ) + precedence=[ + (Assoc.LEFT, [PLUS, MINUS]), + (Assoc.LEFT, [STAR, SLASH]), + (Assoc.LEFT, [LPAREN]), + ], +) `, "input": ` fn dingus(x:f64, y:f64) -> f64 { diff --git a/examples/sqlsorta.py b/examples/sqlsorta.py new file mode 100644 index 0000000..261b7a7 --- /dev/null +++ b/examples/sqlsorta.py @@ -0,0 +1,122 @@ +# A silly little SQL grammar. Incomplete, but you get it, right? +from parser import * + + +@rule +def query(): + return select_clause + opt(from_clause) + + +@rule +def select_clause(): + return SELECT + select_column_list + + +@rule(transparent=True) +def select_column_list(): + return alt( + column_spec, + select_column_list + COMMA + column_spec, + ) + + +@rule +def column_spec(): + return alt( + STAR, + expression + opt(alias), + ) + + +@rule +def alias(): + return AS + NAME + + +@rule +def from_clause(): + return FROM + table_list + + +@rule(transparent=True) +def table_list(): + return table_clause | (table_list + COMMA + table_clause) + + +@rule +def table_clause(): + return alt( + table_expression + opt(alias), + join_clause, + ) + + +@rule +def table_expression(): + return alt( + NAME, + LPAREN + query + RPAREN, + ) + + +@rule +def join_clause(): + return join_type + table_expression + ON + expression + + +@rule +def join_type(): + return ( + opt( + alt( + opt(alt(LEFT, RIGHT)) + OUTER, + INNER, + CROSS, + ) + ) + + JOIN + ) + + +@rule +def expression(): + return NAME + + +BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus()) + +# TODO: Case insensitivity? I don't know if I care- this grammar +# tool is more about new languages than parsing existing ones, +# and this SQL grammar is just a demo. Do people want case +# ignoring lexers? +SELECT = Terminal("SELECT", "select") +AS = Terminal("AS", "as") +COMMA = Terminal("COMMA", ",") +STAR = Terminal("STAR", "*") +FROM = Terminal("FROM", "from") +WHERE = Terminal("WHERE", "where") +LPAREN = Terminal("LPAREN", "(") +RPAREN = Terminal("RPAREN", ")") + +LEFT = Terminal("LEFT", "left") +RIGHT = Terminal("RIGHT", "right") +OUTER = Terminal("OUTER", "outer") +INNER = Terminal("INNER", "inner") +CROSS = Terminal("CROSS", "cross") +JOIN = Terminal("JOIN", "join") +ON = Terminal("ON", "on") + +NAME = Terminal( + "NAME", + Re.seq( + Re.set(("a", "z"), ("A", "Z"), "_"), + Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(), + ), +) + +SQL = Grammar( + start=query, + precedence=[], + trivia=[BLANKS], + name="SQL", +)