We don't lose function declarations and whatnot, although we get lost with broken return types.
1147 lines
29 KiB
Rust
1147 lines
29 KiB
Rust
// NOTE: much of this parser structure derived from
|
|
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
|
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
|
use std::fmt::Write as _;
|
|
use std::{cell::Cell, num::NonZeroU32};
|
|
|
|
pub struct SyntaxTree<'a> {
|
|
trees: Vec<Tree<'a>>,
|
|
root: Option<TreeRef>,
|
|
}
|
|
|
|
impl<'a> SyntaxTree<'a> {
|
|
pub fn new() -> Self {
|
|
SyntaxTree {
|
|
trees: vec![],
|
|
root: None,
|
|
}
|
|
}
|
|
|
|
pub fn root(&self) -> Option<TreeRef> {
|
|
self.root
|
|
}
|
|
|
|
pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
|
|
assert!(t.parent.is_none());
|
|
let tr = TreeRef::from_index(self.trees.len());
|
|
|
|
t.start_pos = t
|
|
.children
|
|
.first()
|
|
.map(|c| c.start_position(&self))
|
|
.unwrap_or(0);
|
|
|
|
t.end_pos = t
|
|
.children
|
|
.last()
|
|
.map(|c| c.end_position(&self))
|
|
.unwrap_or(t.start_pos);
|
|
|
|
// NOTE: Because of the difficulty of holding multiple mutable
|
|
// references it's this is our best chance to patch up parent
|
|
// pointers.
|
|
for child in t.children.iter() {
|
|
if let Child::Tree(ct) = child {
|
|
self[*ct].parent = Some(tr);
|
|
}
|
|
}
|
|
|
|
self.trees.push(t);
|
|
tr
|
|
}
|
|
|
|
pub fn dump(&self, with_positions: bool) -> String {
|
|
let mut output = String::new();
|
|
if let Some(r) = self.root {
|
|
self[r].dump(self, with_positions, &mut output);
|
|
}
|
|
output
|
|
}
|
|
|
|
pub fn start_position(&self, t: TreeRef) -> usize {
|
|
self[t].start_pos
|
|
}
|
|
|
|
pub fn end_position(&self, t: TreeRef) -> usize {
|
|
self[t].end_pos
|
|
}
|
|
|
|
pub fn len(&self) -> usize {
|
|
self.trees.len()
|
|
}
|
|
|
|
pub fn trees(&self) -> impl Iterator<Item = TreeRef> {
|
|
(0..self.trees.len()).map(|i| TreeRef::from_index(i))
|
|
}
|
|
|
|
pub fn find_tree_at(&self, pos: usize) -> Option<TreeRef> {
|
|
let mut current = self.root?;
|
|
let mut tree = &self[current];
|
|
if pos < tree.start_pos || pos >= tree.end_pos {
|
|
return None;
|
|
}
|
|
|
|
loop {
|
|
let mut found = false;
|
|
for child in &tree.children {
|
|
if let Child::Tree(next) = child {
|
|
let next_tree = &self[*next];
|
|
if pos >= next_tree.start_pos && pos < next_tree.end_pos {
|
|
found = true;
|
|
current = *next;
|
|
tree = next_tree;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
return Some(current);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> std::ops::Index<TreeRef> for SyntaxTree<'a> {
|
|
type Output = Tree<'a>;
|
|
|
|
fn index(&self, index: TreeRef) -> &Self::Output {
|
|
&self.trees[index.index()]
|
|
}
|
|
}
|
|
|
|
impl<'a> std::ops::IndexMut<TreeRef> for SyntaxTree<'a> {
|
|
fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output {
|
|
&mut self.trees[index.index()]
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Eq, PartialEq)]
|
|
pub enum TreeKind {
|
|
Error,
|
|
|
|
Argument,
|
|
ArgumentList,
|
|
BinaryExpression,
|
|
Block,
|
|
CallExpression,
|
|
ClassDecl,
|
|
ConditionalExpression,
|
|
ExpressionStatement,
|
|
FieldDecl,
|
|
FieldList,
|
|
FieldValue,
|
|
File,
|
|
ForStatement,
|
|
FunctionDecl,
|
|
GroupingExpression,
|
|
Identifier,
|
|
IfStatement,
|
|
LetStatement,
|
|
ListConstructor,
|
|
ListConstructorElement,
|
|
LiteralExpression,
|
|
MemberAccess,
|
|
NewObjectExpression,
|
|
ParamList,
|
|
Parameter,
|
|
ReturnStatement,
|
|
ReturnType,
|
|
SelfParameter,
|
|
SelfReference,
|
|
TypeExpression,
|
|
TypeParameter,
|
|
TypeParameterList,
|
|
UnaryExpression,
|
|
}
|
|
|
|
pub struct Tree<'a> {
|
|
pub kind: TreeKind,
|
|
pub parent: Option<TreeRef>, // TODO: Do we actually need this?
|
|
pub start_pos: usize,
|
|
pub end_pos: usize,
|
|
pub children: Vec<Child<'a>>,
|
|
}
|
|
|
|
impl<'a> Tree<'a> {
|
|
pub fn nth_token(&self, index: usize) -> Option<&Token<'a>> {
|
|
self.children
|
|
.get(index)
|
|
.map(|c| match c {
|
|
Child::Token(t) => Some(t),
|
|
_ => None,
|
|
})
|
|
.flatten()
|
|
}
|
|
|
|
pub fn nth_tree(&self, index: usize) -> Option<TreeRef> {
|
|
self.children
|
|
.get(index)
|
|
.map(|c| match c {
|
|
Child::Tree(t) => Some(*t),
|
|
_ => None,
|
|
})
|
|
.flatten()
|
|
}
|
|
|
|
pub fn child_trees<'b>(&'b self) -> impl Iterator<Item = TreeRef> + 'b {
|
|
self.children.iter().filter_map(|c| match c {
|
|
Child::Tree(t) => Some(*t),
|
|
_ => None,
|
|
})
|
|
}
|
|
|
|
pub fn children_of_kind<'b>(
|
|
&'b self,
|
|
s: &'b SyntaxTree,
|
|
kind: TreeKind,
|
|
) -> impl Iterator<Item = TreeRef> + 'b {
|
|
self.child_trees()
|
|
.filter_map(move |t| if s[t].kind == kind { Some(t) } else { None })
|
|
}
|
|
|
|
pub fn child_of_kind(&self, s: &SyntaxTree, kind: TreeKind) -> Option<TreeRef> {
|
|
self.children_of_kind(&s, kind).next()
|
|
}
|
|
|
|
pub fn child_tree_of_kind<'b>(
|
|
&'b self,
|
|
s: &'b SyntaxTree<'a>,
|
|
kind: TreeKind,
|
|
) -> Option<&'b Tree<'a>> {
|
|
self.child_of_kind(s, kind).map(|t| &s[t])
|
|
}
|
|
|
|
pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
|
|
let _ = write!(output, "{:?}", self.kind);
|
|
if with_positions {
|
|
let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
|
|
}
|
|
let _ = write!(output, "\n");
|
|
for child in self.children.iter() {
|
|
child.dump_rec(2, tree, with_positions, output);
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> std::fmt::Debug for Tree<'a> {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
write!(f, "{:?} [{}-{})", self.kind, self.start_pos, self.end_pos)
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
|
pub struct TreeRef(NonZeroU32);
|
|
|
|
impl TreeRef {
|
|
pub fn from_index(index: usize) -> TreeRef {
|
|
let index: u32 = (index + 1).try_into().unwrap();
|
|
TreeRef(NonZeroU32::new(index).unwrap())
|
|
}
|
|
|
|
pub fn index(&self) -> usize {
|
|
let index: usize = self.0.get().try_into().unwrap();
|
|
index - 1
|
|
}
|
|
}
|
|
|
|
pub enum Child<'a> {
|
|
Token(Token<'a>),
|
|
Tree(TreeRef),
|
|
}
|
|
|
|
impl<'a> Child<'a> {
|
|
fn dump_rec(
|
|
&self,
|
|
indent: usize,
|
|
tree: &SyntaxTree<'a>,
|
|
with_positions: bool,
|
|
output: &mut String,
|
|
) {
|
|
for _ in 0..indent {
|
|
let _ = write!(output, " ");
|
|
}
|
|
match self {
|
|
Child::Token(t) => {
|
|
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
|
|
if with_positions {
|
|
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
|
|
}
|
|
let _ = write!(output, "\n");
|
|
}
|
|
Child::Tree(t) => {
|
|
let t = &tree[*t];
|
|
let _ = write!(output, "{:?}", t.kind);
|
|
if with_positions {
|
|
let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos);
|
|
}
|
|
let _ = write!(output, "\n");
|
|
|
|
for child in t.children.iter() {
|
|
child.dump_rec(indent + 2, tree, with_positions, output);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
|
|
match &self {
|
|
Child::Token(t) => t.start,
|
|
Child::Tree(t) => syntax_tree[*t].start_pos,
|
|
}
|
|
}
|
|
|
|
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
|
|
match &self {
|
|
Child::Token(t) => t.start + t.as_str().len(),
|
|
Child::Tree(t) => syntax_tree[*t].end_pos,
|
|
}
|
|
}
|
|
}
|
|
|
|
enum ParseEvent<'a> {
|
|
Start { kind: TreeKind },
|
|
End,
|
|
Advance { token: Token<'a> },
|
|
}
|
|
|
|
struct MarkStarted {
|
|
index: usize,
|
|
}
|
|
|
|
struct MarkClosed {
|
|
index: usize,
|
|
}
|
|
|
|
struct CParser<'a> {
|
|
tokens: Tokens<'a>,
|
|
current: Token<'a>,
|
|
fuel: Cell<u32>,
|
|
events: Vec<ParseEvent<'a>>,
|
|
panic: bool,
|
|
}
|
|
|
|
impl<'a> CParser<'a> {
|
|
fn new(tokens: Tokens<'a>) -> Self {
|
|
let mut parser = CParser {
|
|
tokens,
|
|
current: Token::new(TokenKind::EOF, 0, ""),
|
|
fuel: Cell::new(256),
|
|
events: Vec::new(),
|
|
panic: false,
|
|
};
|
|
parser.current = parser.tokens.next();
|
|
parser.skip_ephemera();
|
|
parser
|
|
}
|
|
|
|
fn start(&mut self) -> MarkStarted {
|
|
let mark = MarkStarted {
|
|
index: self.events.len(),
|
|
};
|
|
self.events.push(ParseEvent::Start {
|
|
kind: TreeKind::Error,
|
|
});
|
|
mark
|
|
}
|
|
|
|
fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed {
|
|
self.events[mark.index] = ParseEvent::Start { kind };
|
|
self.events.push(ParseEvent::End);
|
|
MarkClosed { index: mark.index }
|
|
}
|
|
|
|
fn start_before(&mut self, mark: MarkClosed) -> MarkStarted {
|
|
// TODO: Point backwards and pointer chase in tree build?
|
|
let mark = MarkStarted { index: mark.index };
|
|
self.events.insert(
|
|
mark.index,
|
|
ParseEvent::Start {
|
|
kind: TreeKind::Error,
|
|
},
|
|
);
|
|
mark
|
|
}
|
|
|
|
fn advance(&mut self) {
|
|
assert!(!self.eof()); // Don't try to advance past EOF
|
|
self.fuel.set(256); // Consuming a token, reset stuck detector
|
|
self.events.push(ParseEvent::Advance {
|
|
token: self.current.clone(),
|
|
});
|
|
self.current = self.tokens.next();
|
|
self.skip_ephemera();
|
|
}
|
|
|
|
fn skip_ephemera(&mut self) {
|
|
while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment
|
|
{
|
|
self.current = self.tokens.next();
|
|
}
|
|
}
|
|
|
|
fn eof(&self) -> bool {
|
|
self.current.kind == TokenKind::EOF
|
|
}
|
|
|
|
fn peek(&self) -> TokenKind {
|
|
if self.fuel.get() == 0 {
|
|
panic!(
|
|
"parser is stuck at '{}' ({})!",
|
|
self.current, self.current.start
|
|
);
|
|
}
|
|
self.fuel.set(self.fuel.get() - 1);
|
|
self.current.kind
|
|
}
|
|
|
|
// fn trace(&self, msg: &str) {
|
|
// eprintln!("{}: {}: {}", self.current.start, self.current, msg);
|
|
// }
|
|
|
|
fn at_any(&self, kinds: &[TokenKind]) -> bool {
|
|
for kind in kinds {
|
|
if self.at(*kind) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
fn at(&self, kind: TokenKind) -> bool {
|
|
self.peek() == kind
|
|
}
|
|
|
|
fn eat(&mut self, kind: TokenKind) -> bool {
|
|
if self.at(kind) {
|
|
self.panic = false; // Check
|
|
self.advance();
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
fn expect<T>(&mut self, kind: TokenKind, error: T)
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
if self.eat(kind) {
|
|
return;
|
|
}
|
|
self.error(error);
|
|
}
|
|
|
|
fn expect_start(&mut self, kind: TokenKind) {
|
|
assert!(self.eat(kind), "should have started with {kind:?}");
|
|
}
|
|
|
|
fn advance_with_error<T>(&mut self, error: T) -> MarkClosed
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
let m = self.start();
|
|
self.error(error);
|
|
self.advance();
|
|
self.end(m, TreeKind::Error)
|
|
}
|
|
|
|
fn error<T>(&mut self, message: T)
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
self.error_at(self.current.clone(), message)
|
|
}
|
|
|
|
fn error_at<T>(&mut self, token: Token<'a>, message: T)
|
|
where
|
|
T: Into<String>,
|
|
{
|
|
if self.panic {
|
|
return;
|
|
}
|
|
self.panic = true;
|
|
|
|
let message: String = message.into();
|
|
let mut final_message = "Error ".to_string();
|
|
|
|
if token.kind == TokenKind::EOF {
|
|
final_message.push_str("at end")
|
|
} else if token.kind != TokenKind::Error {
|
|
final_message.push_str("at '");
|
|
final_message.push_str(token.as_str());
|
|
final_message.push_str("'");
|
|
}
|
|
final_message.push_str(": ");
|
|
final_message.push_str(&message);
|
|
|
|
self.events.push(ParseEvent::Advance {
|
|
token: Token::error(token.start, final_message),
|
|
});
|
|
}
|
|
|
|
fn build_tree(self) -> (SyntaxTree<'a>, Lines) {
|
|
let mut events = self.events;
|
|
let mut stack = Vec::new();
|
|
|
|
let mut result = SyntaxTree::new();
|
|
|
|
// The first element in our events vector must be a start; the whole
|
|
// thing must be bracketed in a tree.
|
|
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
|
|
|
|
// The last element in our events vector must be an end, otherwise
|
|
// the parser has failed badly. We'll remove it here so that, after
|
|
// processing the entire array, the stack retains the tree that we
|
|
// start with the very first ::Start.
|
|
assert!(matches!(events.pop(), Some(ParseEvent::End)));
|
|
|
|
for event in events {
|
|
match event {
|
|
ParseEvent::Start { kind } => stack.push(Tree {
|
|
kind,
|
|
parent: None,
|
|
start_pos: 0,
|
|
end_pos: 0,
|
|
children: Vec::new(),
|
|
}),
|
|
|
|
ParseEvent::End => {
|
|
let t = result.add_tree(stack.pop().unwrap());
|
|
stack.last_mut().unwrap().children.push(Child::Tree(t));
|
|
}
|
|
|
|
ParseEvent::Advance { token } => {
|
|
stack.last_mut().unwrap().children.push(Child::Token(token));
|
|
}
|
|
}
|
|
}
|
|
|
|
assert!(stack.len() == 1, "Not all trees were ended!");
|
|
let root = result.add_tree(stack.pop().unwrap());
|
|
result.root = Some(root);
|
|
|
|
(result, self.tokens.lines())
|
|
}
|
|
}
|
|
|
|
pub fn parse(source: &str) -> (SyntaxTree, Lines) {
|
|
let tokens = Tokens::new(source);
|
|
let mut parser = CParser::new(tokens);
|
|
|
|
file(&mut parser);
|
|
|
|
parser.build_tree()
|
|
}
|
|
|
|
fn file(p: &mut CParser) {
|
|
let m = p.start();
|
|
while !p.eof() {
|
|
match p.peek() {
|
|
TokenKind::Class => class(p),
|
|
TokenKind::RightBrace => {
|
|
// An error parsing mismatched braces can leave me at an
|
|
// un-balanced right brace, which unfortunately will not be
|
|
// consumed by the statement below. (Statement currently
|
|
// falls through to expression_statement, which checks for
|
|
// the right-brace that a block would end with.)
|
|
p.advance_with_error("unbalanced '}'");
|
|
}
|
|
_ => {
|
|
if !statement(p) {
|
|
if p.at_any(STATEMENT_RECOVERY) {
|
|
break;
|
|
} else {
|
|
p.advance_with_error("expected statement");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
p.end(m, TreeKind::File);
|
|
}
|
|
|
|
fn function(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Fun);
|
|
p.expect(TokenKind::Identifier, "expected a function name");
|
|
if p.at(TokenKind::LeftParen) {
|
|
param_list(p);
|
|
}
|
|
if p.at(TokenKind::Arrow) {
|
|
return_type(p);
|
|
}
|
|
if p.at(TokenKind::LeftBrace) {
|
|
block(p);
|
|
}
|
|
|
|
p.end(m, TreeKind::FunctionDecl);
|
|
}
|
|
|
|
fn class(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Class);
|
|
p.expect(TokenKind::Identifier, "expected a class name");
|
|
if p.eat(TokenKind::LeftBrace) {
|
|
while !p.at(TokenKind::RightBrace) && !p.eof() {
|
|
if p.at(TokenKind::Identifier) {
|
|
field_decl(p);
|
|
} else if p.at(TokenKind::Fun) {
|
|
function(p);
|
|
} else {
|
|
p.advance_with_error("expected a field declaration");
|
|
}
|
|
}
|
|
}
|
|
p.expect(TokenKind::RightBrace, "expected a class to end with a '}'");
|
|
|
|
p.end(m, TreeKind::ClassDecl);
|
|
}
|
|
|
|
fn field_decl(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect(TokenKind::Identifier, "expected a field name");
|
|
if p.eat(TokenKind::Colon) {
|
|
type_expr(p);
|
|
}
|
|
p.expect(
|
|
TokenKind::Semicolon,
|
|
"expect a ';' after field declarations",
|
|
);
|
|
|
|
p.end(m, TreeKind::FieldDecl);
|
|
}
|
|
|
|
const PARAM_LIST_RECOVERY: &[TokenKind] = &[
|
|
TokenKind::Arrow,
|
|
TokenKind::LeftBrace,
|
|
TokenKind::Fun,
|
|
TokenKind::RightParen,
|
|
];
|
|
|
|
fn param_list(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::LeftParen);
|
|
while !p.at_any(PARAM_LIST_RECOVERY) && !p.eof() {
|
|
if p.at(TokenKind::Identifier) {
|
|
parameter(p);
|
|
} else if p.at(TokenKind::Selff) {
|
|
self_parameter(p);
|
|
} else {
|
|
p.advance_with_error("expected parameter");
|
|
}
|
|
}
|
|
p.expect(TokenKind::RightParen, "expect ')' to end a parameter list");
|
|
|
|
p.end(m, TreeKind::ParamList);
|
|
}
|
|
|
|
fn parameter(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Identifier);
|
|
if p.eat(TokenKind::Colon) {
|
|
type_expr(p);
|
|
}
|
|
if !p.at(TokenKind::RightParen) {
|
|
p.expect(TokenKind::Comma, "expected a comma between parameters");
|
|
}
|
|
|
|
p.end(m, TreeKind::Parameter);
|
|
}
|
|
|
|
fn self_parameter(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Selff);
|
|
if p.eat(TokenKind::Colon) {
|
|
p.error("self parameters cannot have explicit types");
|
|
type_expr(p);
|
|
}
|
|
if !p.at(TokenKind::RightParen) {
|
|
p.expect(TokenKind::Comma, "expected a comma between parameters");
|
|
}
|
|
|
|
p.end(m, TreeKind::SelfParameter);
|
|
}
|
|
|
|
fn return_type(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Arrow);
|
|
type_expr(p);
|
|
|
|
p.end(m, TreeKind::ReturnType);
|
|
}
|
|
|
|
fn type_expr(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
// TODO: Other kinds of type expressions probably!
|
|
p.expect(TokenKind::Identifier, "expected the identifier of a type");
|
|
|
|
if p.at(TokenKind::Less) {
|
|
type_parameter_list(p);
|
|
}
|
|
|
|
p.end(m, TreeKind::TypeExpression);
|
|
}
|
|
|
|
fn type_parameter_list(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Less);
|
|
while !p.at(TokenKind::Greater) && !p.eof() {
|
|
if p.at(TokenKind::Identifier) {
|
|
type_parameter(p);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
p.expect(TokenKind::Greater, "expected > to end type parameter list");
|
|
|
|
p.end(m, TreeKind::TypeParameterList);
|
|
}
|
|
|
|
fn type_parameter(p: &mut CParser) {
|
|
assert!(p.at(TokenKind::Identifier));
|
|
let m = p.start();
|
|
|
|
type_expr(p);
|
|
if !p.at(TokenKind::Greater) {
|
|
p.expect(TokenKind::Comma, "expect a comma between type parameters");
|
|
}
|
|
|
|
p.end(m, TreeKind::TypeParameter);
|
|
}
|
|
|
|
const STATEMENT_RECOVERY: &[TokenKind] = &[
|
|
TokenKind::RightBrace,
|
|
TokenKind::Fun,
|
|
TokenKind::LeftBrace,
|
|
TokenKind::Let,
|
|
TokenKind::Return,
|
|
TokenKind::For,
|
|
TokenKind::Class,
|
|
];
|
|
|
|
fn block(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::LeftBrace);
|
|
while !p.at(TokenKind::RightBrace) && !p.eof() {
|
|
if !statement(p) {
|
|
if p.at_any(STATEMENT_RECOVERY) {
|
|
break;
|
|
} else {
|
|
p.advance_with_error("expected statement");
|
|
}
|
|
}
|
|
}
|
|
p.expect(TokenKind::RightBrace, "expect '}' to end a block");
|
|
|
|
p.end(m, TreeKind::Block);
|
|
}
|
|
|
|
fn statement(p: &mut CParser) -> bool {
|
|
match p.peek() {
|
|
TokenKind::Fun => function(p),
|
|
TokenKind::LeftBrace => block(p),
|
|
TokenKind::Let => statement_let(p),
|
|
TokenKind::Return => statement_return(p),
|
|
TokenKind::For => statement_for(p),
|
|
|
|
// NOTE: Technically 'if' is an expression, but `if` doesn't
|
|
// require a semicolon at the end if it's all by itself.
|
|
TokenKind::If => statement_if(p),
|
|
|
|
_ => {
|
|
if p.at(TokenKind::Semicolon) || p.at_any(EXPRESSION_FIRST) {
|
|
statement_expression(p)
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
true
|
|
}
|
|
|
|
fn statement_if(p: &mut CParser) {
|
|
assert!(p.at(TokenKind::If));
|
|
let m = p.start();
|
|
|
|
conditional(p);
|
|
|
|
p.end(m, TreeKind::IfStatement);
|
|
}
|
|
|
|
fn statement_let(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Let);
|
|
p.expect(TokenKind::Identifier, "expected a name for the variable");
|
|
p.expect(TokenKind::Equal, "expected a '=' after the variable name");
|
|
if p.at_any(EXPRESSION_FIRST) {
|
|
expression(p);
|
|
}
|
|
if !p.at(TokenKind::RightBrace) {
|
|
p.expect(TokenKind::Semicolon, "expect ';' to end a let statement");
|
|
}
|
|
|
|
p.end(m, TreeKind::LetStatement);
|
|
}
|
|
|
|
fn statement_return(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::Return);
|
|
if p.at_any(EXPRESSION_FIRST) {
|
|
// TODO: Make expression optional if we're returning ()
|
|
expression(p);
|
|
}
|
|
if !p.at(TokenKind::RightBrace) {
|
|
p.expect(TokenKind::Semicolon, "expect ';' to end a return statement");
|
|
}
|
|
|
|
p.end(m, TreeKind::ReturnStatement);
|
|
}
|
|
|
|
fn statement_for(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::For);
|
|
p.expect(
|
|
TokenKind::Identifier,
|
|
"expected an identifier for the loop variable",
|
|
);
|
|
p.expect(TokenKind::In, "expect an 'in' after the loop variable");
|
|
if p.at_any(EXPRESSION_FIRST) {
|
|
expression(p);
|
|
}
|
|
if p.at(TokenKind::LeftBrace) {
|
|
block(p);
|
|
}
|
|
|
|
p.end(m, TreeKind::ForStatement);
|
|
}
|
|
|
|
fn statement_expression(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
if p.at_any(EXPRESSION_FIRST) {
|
|
expression(p);
|
|
}
|
|
if !p.at(TokenKind::RightBrace) {
|
|
p.expect(
|
|
TokenKind::Semicolon,
|
|
"expect ';' to end an expression statement",
|
|
);
|
|
}
|
|
|
|
p.end(m, TreeKind::ExpressionStatement);
|
|
}
|
|
|
|
const EXPRESSION_FIRST: &[TokenKind] = &[
|
|
TokenKind::Number,
|
|
TokenKind::String,
|
|
TokenKind::True,
|
|
TokenKind::False,
|
|
TokenKind::LeftParen,
|
|
TokenKind::Bang,
|
|
TokenKind::Minus,
|
|
TokenKind::If,
|
|
TokenKind::Identifier,
|
|
TokenKind::Selff,
|
|
TokenKind::LeftBracket,
|
|
TokenKind::New,
|
|
];
|
|
|
|
fn expression(p: &mut CParser) {
|
|
expression_with_power(p, 0)
|
|
}
|
|
|
|
const UNARY_POWER: u8 = 14;
|
|
|
|
fn infix_power(token: TokenKind) -> Option<(u8, u8)> {
|
|
// A dumb thing: the pair controls associativity.
|
|
//
|
|
// If lhs < rhs then it's left-associative, otherwise it's
|
|
// right-associative.
|
|
match token {
|
|
TokenKind::Equal => Some((1, 0)),
|
|
TokenKind::Or => Some((2, 3)),
|
|
TokenKind::And => Some((4, 5)),
|
|
TokenKind::EqualEqual | TokenKind::BangEqual => Some((6, 7)),
|
|
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
|
Some((8, 9))
|
|
}
|
|
TokenKind::Plus | TokenKind::Minus => Some((10, 11)),
|
|
TokenKind::Star | TokenKind::Slash => Some((12, 13)),
|
|
//
|
|
// UNARY_POWER goes here.
|
|
//
|
|
TokenKind::Dot => Some((16, 17)),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn expression_with_power(p: &mut CParser, minimum_power: u8) {
|
|
let Some(mut expr) = prefix_expression(p) else {
|
|
return;
|
|
};
|
|
while p.at(TokenKind::LeftParen) {
|
|
let m = p.start_before(expr);
|
|
argument_list(p);
|
|
expr = p.end(m, TreeKind::CallExpression);
|
|
}
|
|
|
|
loop {
|
|
let token = p.peek();
|
|
let Some((lp, rp)) = infix_power(token) else {
|
|
break;
|
|
};
|
|
if lp < minimum_power {
|
|
break;
|
|
}
|
|
|
|
// TODO: I don't think this works for other "infix" types, but we'll
|
|
// see won't we.
|
|
let m = p.start_before(expr);
|
|
p.advance(); // Consume the operator
|
|
expression_with_power(p, rp);
|
|
expr = p.end(
|
|
m,
|
|
if token == TokenKind::Dot {
|
|
TreeKind::MemberAccess
|
|
} else {
|
|
TreeKind::BinaryExpression
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
fn argument_list(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::LeftParen);
|
|
while !p.at(TokenKind::RightParen) && !p.eof() {
|
|
if p.at_any(EXPRESSION_FIRST) {
|
|
argument(p);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
p.expect(
|
|
TokenKind::RightParen,
|
|
"expect an argument list to start with '('",
|
|
);
|
|
|
|
p.end(m, TreeKind::ArgumentList);
|
|
}
|
|
|
|
fn argument(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
expression(p);
|
|
if !p.at(TokenKind::RightParen) {
|
|
p.expect(TokenKind::Comma, "expect a ',' between arguments");
|
|
}
|
|
|
|
p.end(m, TreeKind::Argument);
|
|
}
|
|
|
|
fn prefix_expression(p: &mut CParser) -> Option<MarkClosed> {
|
|
let result = match p.peek() {
|
|
TokenKind::Number => literal(p),
|
|
TokenKind::String => literal(p),
|
|
TokenKind::True => literal(p),
|
|
TokenKind::False => literal(p),
|
|
|
|
TokenKind::LeftParen => grouping(p),
|
|
|
|
TokenKind::Bang => unary(p),
|
|
TokenKind::Minus => unary(p),
|
|
|
|
TokenKind::If => conditional(p),
|
|
|
|
TokenKind::Identifier => identifier(p),
|
|
TokenKind::Selff => self_reference(p),
|
|
|
|
TokenKind::LeftBracket => list_constructor(p),
|
|
|
|
TokenKind::New => object_constructor(p),
|
|
|
|
_ => {
|
|
assert!(!p.at_any(EXPRESSION_FIRST));
|
|
return None;
|
|
}
|
|
};
|
|
Some(result)
|
|
}
|
|
|
|
fn literal(p: &mut CParser) -> MarkClosed {
|
|
let m = p.start();
|
|
p.advance();
|
|
p.end(m, TreeKind::LiteralExpression)
|
|
}
|
|
|
|
fn grouping(p: &mut CParser) -> MarkClosed {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::LeftParen);
|
|
expression(p);
|
|
p.expect(TokenKind::RightParen, "unmatched parentheses in expression");
|
|
|
|
p.end(m, TreeKind::GroupingExpression)
|
|
}
|
|
|
|
fn unary(p: &mut CParser) -> MarkClosed {
|
|
let m = p.start();
|
|
|
|
p.advance(); // Past the operator
|
|
expression_with_power(p, UNARY_POWER);
|
|
|
|
p.end(m, TreeKind::UnaryExpression)
|
|
}
|
|
|
|
fn conditional(p: &mut CParser) -> MarkClosed {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::If);
|
|
expression(p);
|
|
if p.at(TokenKind::LeftBrace) {
|
|
block(p)
|
|
} else {
|
|
p.error("expected a block after `if`")
|
|
}
|
|
if p.eat(TokenKind::Else) {
|
|
if p.at(TokenKind::If) {
|
|
// Don't require another block, just jump right into the conditional.
|
|
conditional(p);
|
|
} else if p.at(TokenKind::LeftBrace) {
|
|
block(p);
|
|
} else {
|
|
p.error("expected a block after `else`")
|
|
}
|
|
}
|
|
|
|
p.end(m, TreeKind::ConditionalExpression)
|
|
}
|
|
|
|
fn identifier(p: &mut CParser) -> MarkClosed {
|
|
assert!(p.at(TokenKind::Identifier));
|
|
let m = p.start();
|
|
|
|
p.advance();
|
|
|
|
p.end(m, TreeKind::Identifier)
|
|
}
|
|
|
|
fn self_reference(p: &mut CParser) -> MarkClosed {
|
|
assert!(p.at(TokenKind::Selff));
|
|
let m = p.start();
|
|
|
|
p.advance();
|
|
|
|
p.end(m, TreeKind::SelfReference)
|
|
}
|
|
|
|
fn list_constructor(p: &mut CParser) -> MarkClosed {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::LeftBracket);
|
|
while !p.at(TokenKind::RightBracket) && !p.eof() {
|
|
if p.at_any(EXPRESSION_FIRST) {
|
|
list_constructor_element(p);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
p.expect(
|
|
TokenKind::RightBracket,
|
|
"expected a ] to end the list constructor",
|
|
);
|
|
|
|
p.end(m, TreeKind::ListConstructor)
|
|
}
|
|
|
|
fn list_constructor_element(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
expression(p);
|
|
if !p.at(TokenKind::RightBracket) {
|
|
p.expect(
|
|
TokenKind::Comma,
|
|
"expected a comma between list constructor elements",
|
|
);
|
|
}
|
|
|
|
p.end(m, TreeKind::ListConstructorElement);
|
|
}
|
|
|
|
fn object_constructor(p: &mut CParser) -> MarkClosed {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::New);
|
|
type_expr(p);
|
|
if p.at(TokenKind::LeftBrace) {
|
|
field_list(p);
|
|
} else {
|
|
p.error("expected a '{' to start the field list after the class type");
|
|
}
|
|
|
|
p.end(m, TreeKind::NewObjectExpression)
|
|
}
|
|
|
|
fn field_list(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect_start(TokenKind::LeftBrace);
|
|
while !p.at(TokenKind::RightBrace) && !p.eof() {
|
|
field_value(p);
|
|
}
|
|
p.expect(
|
|
TokenKind::RightBrace,
|
|
"expected the field list to end with '}'",
|
|
);
|
|
|
|
p.end(m, TreeKind::FieldList);
|
|
}
|
|
|
|
fn field_value(p: &mut CParser) {
|
|
let m = p.start();
|
|
|
|
p.expect(TokenKind::Identifier, "expected a field name");
|
|
if p.eat(TokenKind::Colon) {
|
|
expression(p);
|
|
}
|
|
if !p.at(TokenKind::RightBrace) {
|
|
p.expect(TokenKind::Comma, "expect a ',' between fields");
|
|
}
|
|
|
|
p.end(m, TreeKind::FieldValue);
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn tree_ref_size() {
|
|
// What's the point of doing all that work if the tree ref isn't nice
|
|
// and "small"? TreeRef is pervasive throughout the system: we use
|
|
// them to key function definitions and the type checker and use them
|
|
// to link classes to their definitions, etc. It's important that an
|
|
// Option<TreeRef> be *extremely* cheap to manipulate.
|
|
//
|
|
// TODO: This optimization isn't as good as it might be because tokens are
|
|
// huge so Child is huge no matter what we do. If we retain
|
|
// tokens out of line then we can take full advantage of this.
|
|
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
|
|
}
|
|
}
|