Fix the examples in the dingus

This commit is contained in:
John Doty 2025-02-08 08:08:15 -08:00
parent d70f9ab958
commit 1aa85cc295
2 changed files with 385 additions and 223 deletions

View file

@ -338,109 +338,125 @@ const EXAMPLES = [
# A silly little SQL grammar. Incomplete, but you get it, right?
from parser import *
class SQLGrammar(Grammar):
start = "query"
trivia = ["BLANKS"]
precedence = [
]
@rule
def query():
return select_clause + opt(from_clause)
@rule
def query(self):
return self.select_clause + opt(self.from_clause)
@rule
def select_clause(self):
return self.SELECT + self.select_column_list
@rule
def select_clause():
return SELECT + select_column_list
@rule(transparent=True)
def select_column_list(self):
return alt(
self.column_spec,
self.select_column_list + self.COMMA + self.column_spec,
)
@rule
def column_spec(self):
return alt(
self.STAR,
self.expression + opt(self.alias),
)
@rule
def alias(self):
return self.AS + self.NAME
@rule
def from_clause(self):
return self.FROM + self.table_list
@rule(transparent=True)
def table_list(self):
return (
self.table_clause |
(self.table_list + self.COMMA + self.table_clause)
)
@rule
def table_clause(self):
return alt(
self.table_expression + opt(self.alias),
self.join_clause,
)
@rule
def table_expression(self):
return alt(
self.NAME,
self.LPAREN + self.query + self.RPAREN,
)
@rule
def join_clause(self):
return self.join_type + self.table_expression + self.ON + self.expression
@rule
def join_type(self):
return opt(alt(
opt(alt(self.LEFT, self.RIGHT)) + self.OUTER,
self.INNER,
self.CROSS,
)) + self.JOIN
@rule
def expression(self):
return self.NAME
BLANKS = Terminal(Re.set(" ", "\t", "\r", "\n").plus())
# TODO: Case insensitivity? I don't know if I care- this grammar
# tool is more about new languages than parsing existing ones,
# and this SQL grammar is just a demo. Do people want case
# ignoring lexers?
SELECT = Terminal("select")
AS = Terminal("as")
COMMA = Terminal(",")
STAR = Terminal("*")
FROM = Terminal("from")
WHERE = Terminal("where")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
LEFT = Terminal("left")
RIGHT = Terminal("right")
OUTER = Terminal("outer")
INNER = Terminal("inner")
CROSS = Terminal("cross")
JOIN = Terminal("join")
ON = Terminal("on")
NAME = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
@rule(transparent=True)
def select_column_list():
return alt(
column_spec,
select_column_list + COMMA + column_spec,
)
@rule
def column_spec():
return alt(
STAR,
expression + opt(alias),
)
@rule
def alias():
return AS + NAME
@rule
def from_clause():
return FROM + table_list
@rule(transparent=True)
def table_list():
return table_clause | (table_list + COMMA + table_clause)
@rule
def table_clause():
return alt(
table_expression + opt(alias),
join_clause,
)
@rule
def table_expression():
return alt(
NAME,
LPAREN + query + RPAREN,
)
@rule
def join_clause():
return join_type + table_expression + ON + expression
@rule
def join_type():
return (
opt(
alt(
opt(alt(LEFT, RIGHT)) + OUTER,
INNER,
CROSS,
)
)
+ JOIN
)
@rule
def expression():
return NAME
BLANKS = Terminal("BLANKS", Re.set(" ", "\\t", "\\r", "\\n").plus())
# TODO: Case insensitivity? I don't know if I care- this grammar
# tool is more about new languages than parsing existing ones,
# and this SQL grammar is just a demo. Do people want case
# ignoring lexers?
SELECT = Terminal("SELECT", "select")
AS = Terminal("AS", "as")
COMMA = Terminal("COMMA", ",")
STAR = Terminal("STAR", "*")
FROM = Terminal("FROM", "from")
WHERE = Terminal("WHERE", "where")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
LEFT = Terminal("LEFT", "left")
RIGHT = Terminal("RIGHT", "right")
OUTER = Terminal("OUTER", "outer")
INNER = Terminal("INNER", "inner")
CROSS = Terminal("CROSS", "cross")
JOIN = Terminal("JOIN", "join")
ON = Terminal("ON", "on")
NAME = Terminal(
"NAME",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
SQL = Grammar(
start=query,
precedence=[],
trivia=[BLANKS],
name="SQL",
)
`,
"input": `
select
@ -461,134 +477,158 @@ from
# https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
from parser import *
class LGrammar(Grammar):
start = "File"
trivia = ["BLANKS"]
@rule
def File():
# TODO: Make lists easier
return _functions
@rule
def _functions():
return Function | (_functions + Function)
@rule
def Function():
return FN + NAME + ParamList + opt(ARROW + TypeExpr) + Block
@rule
def ParamList():
return LPAREN + opt(_parameters) + RPAREN
@rule
def _parameters():
# NOTE: The ungrammar in the reference does not talk about commas required between parameters
# so this massages it to make them required. Commas are in the list not the param, which
# is more awkward for processing but not terminally so.
return (Param + opt(COMMA)) | (Param + COMMA + _parameters)
@rule
def Param():
return NAME + COLON + TypeExpr
@rule
def TypeExpr():
return NAME
@rule
def Block():
return LCURLY + opt(_statements) + RCURLY
@rule
def _statements():
return Stmt | _statements + Stmt
@rule
def Stmt():
return StmtExpr | StmtLet | StmtReturn
@rule
def StmtExpr():
return Expr + SEMICOLON
@rule
def StmtLet():
return LET + NAME + EQUAL + Expr + SEMICOLON
@rule
def StmtReturn():
return RETURN + Expr + SEMICOLON
@rule(error_name="expression")
def Expr():
return ExprLiteral | ExprName | ExprParen | ExprBinary | ExprCall
@rule
def ExprLiteral():
return INT | TRUE | FALSE
@rule
def ExprName():
return NAME
@rule
def ExprParen():
return LPAREN + Expr + RPAREN
@rule
def ExprBinary():
return Expr + (PLUS | MINUS | STAR | SLASH) + Expr
@rule
def ExprCall():
return Expr + ArgList
@rule
def ArgList():
return LPAREN + opt(_arg_star) + RPAREN
@rule
def _arg_star():
# Again, a deviation from the original. See _parameters.
return (Expr + opt(COMMA)) | (Expr + COMMA + _arg_star)
BLANKS = Terminal("BLANKS", Re.set(" ", "\\t", "\\r", "\\n").plus())
TRUE = Terminal("TRUE", "true")
FALSE = Terminal("FALSE", "false")
INT = Terminal("INT", Re.set(("0", "9")).plus())
FN = Terminal("FN", "fn")
ARROW = Terminal("ARROW", "->")
COMMA = Terminal("COMMA", ",")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
LCURLY = Terminal("LCURLY", "{")
RCURLY = Terminal("RCURLY", "}")
COLON = Terminal("COLON", ":")
SEMICOLON = Terminal("SEMICOLON", ";")
LET = Terminal("LET", "let")
EQUAL = Terminal("EQUAL", "=")
RETURN = Terminal("RETURN", "return")
PLUS = Terminal("PLUS", "+")
MINUS = Terminal("MINUS", "-")
STAR = Terminal("STAR", "*")
SLASH = Terminal("SLASH", "/")
NAME = Terminal(
"NAME",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
LGrammar = Grammar(
name="L",
start=File,
trivia=[BLANKS],
# Need a little bit of disambiguation for the symbol involved.
precedence = [
(Assoc.LEFT, ["PLUS", "MINUS"]),
(Assoc.LEFT, ["STAR", "SLASH"]),
(Assoc.LEFT, ["LPAREN"]),
]
@rule
def File(self):
# TODO: Make lists easier
return self._functions
@rule
def _functions(self):
return self.Function | (self._functions + self.Function)
@rule
def Function(self):
return self.FN + self.NAME + self.ParamList + opt(self.ARROW + self.TypeExpr) + self.Block
@rule
def ParamList(self):
return self.LPAREN + opt(self._parameters) + self.RPAREN
@rule
def _parameters(self):
# NOTE: The ungrammar in the reference does not talk about commas required between parameters
# so this massages it to make them required. Commas are in the list not the param, which
# is more awkward for processing but not terminally so.
return (self.Param + opt(self.COMMA)) | (self.Param + self.COMMA + self._parameters)
@rule
def Param(self):
return self.NAME + self.COLON + self.TypeExpr
@rule
def TypeExpr(self):
return self.NAME
@rule
def Block(self):
return self.LCURLY + opt(self._statements) + self.RCURLY
@rule
def _statements(self):
return self.Stmt | self._statements + self.Stmt
@rule
def Stmt(self):
return self.StmtExpr | self.StmtLet | self.StmtReturn
@rule
def StmtExpr(self):
return self.Expr + self.SEMICOLON
@rule
def StmtLet(self):
return self.LET + self.NAME + self.EQUAL + self.Expr + self.SEMICOLON
@rule
def StmtReturn(self):
return self.RETURN + self.Expr + self.SEMICOLON
@rule(error_name="expression")
def Expr(self):
return self.ExprLiteral | self.ExprName | self.ExprParen | self.ExprBinary | self.ExprCall
@rule
def ExprLiteral(self):
return self.INT | self.TRUE | self.FALSE
@rule
def ExprName(self):
return self.NAME
@rule
def ExprParen(self):
return self.LPAREN + self.Expr + self.RPAREN
@rule
def ExprBinary(self):
return self.Expr + (self.PLUS | self.MINUS | self.STAR | self.SLASH) + self.Expr
@rule
def ExprCall(self):
return self.Expr + self.ArgList
@rule
def ArgList(self):
return self.LPAREN + opt(self._arg_star) + self.RPAREN
@rule
def _arg_star(self):
# Again, a deviation from the original. See _parameters.
return (self.Expr + opt(self.COMMA)) | (self.Expr + self.COMMA + self._arg_star)
BLANKS = Terminal(Re.set(" ", "\\t", "\\r", "\\n").plus())
TRUE = Terminal("true")
FALSE = Terminal("false")
INT = Terminal(Re.set(("0", "9")).plus())
FN = Terminal("fn")
ARROW = Terminal("->")
COMMA = Terminal(",")
LPAREN = Terminal("(")
RPAREN = Terminal(")")
LCURLY = Terminal("{")
RCURLY = Terminal("}")
COLON = Terminal(":")
SEMICOLON = Terminal(";")
LET = Terminal("let")
EQUAL = Terminal("=")
RETURN = Terminal("return")
PLUS = Terminal("+")
MINUS = Terminal("-")
STAR = Terminal("*")
SLASH = Terminal("/")
NAME = Terminal(
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
precedence=[
(Assoc.LEFT, [PLUS, MINUS]),
(Assoc.LEFT, [STAR, SLASH]),
(Assoc.LEFT, [LPAREN]),
],
)
`,
"input": `
fn dingus(x:f64, y:f64) -> f64 {

122
examples/sqlsorta.py Normal file
View file

@ -0,0 +1,122 @@
# A silly little SQL grammar. Incomplete, but you get it, right?
from parser import *
@rule
def query():
return select_clause + opt(from_clause)
@rule
def select_clause():
return SELECT + select_column_list
@rule(transparent=True)
def select_column_list():
return alt(
column_spec,
select_column_list + COMMA + column_spec,
)
@rule
def column_spec():
return alt(
STAR,
expression + opt(alias),
)
@rule
def alias():
return AS + NAME
@rule
def from_clause():
return FROM + table_list
@rule(transparent=True)
def table_list():
return table_clause | (table_list + COMMA + table_clause)
@rule
def table_clause():
return alt(
table_expression + opt(alias),
join_clause,
)
@rule
def table_expression():
return alt(
NAME,
LPAREN + query + RPAREN,
)
@rule
def join_clause():
return join_type + table_expression + ON + expression
@rule
def join_type():
return (
opt(
alt(
opt(alt(LEFT, RIGHT)) + OUTER,
INNER,
CROSS,
)
)
+ JOIN
)
@rule
def expression():
return NAME
BLANKS = Terminal("BLANKS", Re.set(" ", "\t", "\r", "\n").plus())
# TODO: Case insensitivity? I don't know if I care- this grammar
# tool is more about new languages than parsing existing ones,
# and this SQL grammar is just a demo. Do people want case
# ignoring lexers?
SELECT = Terminal("SELECT", "select")
AS = Terminal("AS", "as")
COMMA = Terminal("COMMA", ",")
STAR = Terminal("STAR", "*")
FROM = Terminal("FROM", "from")
WHERE = Terminal("WHERE", "where")
LPAREN = Terminal("LPAREN", "(")
RPAREN = Terminal("RPAREN", ")")
LEFT = Terminal("LEFT", "left")
RIGHT = Terminal("RIGHT", "right")
OUTER = Terminal("OUTER", "outer")
INNER = Terminal("INNER", "inner")
CROSS = Terminal("CROSS", "cross")
JOIN = Terminal("JOIN", "join")
ON = Terminal("ON", "on")
NAME = Terminal(
"NAME",
Re.seq(
Re.set(("a", "z"), ("A", "Z"), "_"),
Re.set(("a", "z"), ("A", "Z"), ("0", "9"), "_").star(),
),
)
SQL = Grammar(
start=query,
precedence=[],
trivia=[BLANKS],
name="SQL",
)