// NOTE: much of this parser structure derived from // https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html use crate::tokens::{Lines, Token, TokenKind, Tokens}; use std::fmt::Write as _; use std::{cell::Cell, num::NonZeroU32}; pub struct SyntaxTree<'a> { trees: Vec>, root: Option, } impl<'a> SyntaxTree<'a> { pub fn new() -> Self { SyntaxTree { trees: vec![], root: None, } } pub fn root(&self) -> Option { self.root } pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef { assert!(t.parent.is_none()); let tr = TreeRef::from_index(self.trees.len()); t.start_pos = t .children .first() .map(|c| c.start_position(&self)) .unwrap_or(0); t.end_pos = t .children .last() .map(|c| c.end_position(&self)) .unwrap_or(t.start_pos); // NOTE: Because of the difficulty of holding multiple mutable // references it's this is our best chance to patch up parent // pointers. for child in t.children.iter() { if let Child::Tree(ct) = child { self[*ct].parent = Some(tr); } } self.trees.push(t); tr } pub fn dump(&self, with_positions: bool) -> String { let mut output = String::new(); if let Some(r) = self.root { self[r].dump(self, with_positions, &mut output); } output } pub fn start_position(&self, t: TreeRef) -> usize { self[t].start_pos } pub fn end_position(&self, t: TreeRef) -> usize { self[t].end_pos } pub fn len(&self) -> usize { self.trees.len() } pub fn trees(&self) -> impl Iterator { (0..self.trees.len()).map(|i| TreeRef::from_index(i)) } pub fn find_tree_at(&self, pos: usize) -> Option { let mut current = self.root?; let mut tree = &self[current]; if pos < tree.start_pos || pos >= tree.end_pos { return None; } loop { let mut found = false; for child in &tree.children { if let Child::Tree(next) = child { let next_tree = &self[*next]; if pos >= next_tree.start_pos && pos < next_tree.end_pos { found = true; current = *next; tree = next_tree; break; } } } if !found { return Some(current); } } } } impl<'a> std::ops::Index for SyntaxTree<'a> { type Output = Tree<'a>; fn index(&self, index: TreeRef) -> &Self::Output { &self.trees[index.index()] } } impl<'a> std::ops::IndexMut for SyntaxTree<'a> { fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output { &mut self.trees[index.index()] } } #[derive(Debug, Eq, PartialEq)] pub enum TreeKind { Error, AlternateType, Argument, ArgumentList, BinaryExpression, Block, CallExpression, ClassDecl, ConditionalExpression, ExpressionStatement, FieldDecl, FieldList, FieldValue, File, ForStatement, FunctionDecl, GroupingExpression, Identifier, IfStatement, IsExpression, IteratorVariable, LetStatement, ListConstructor, ListConstructorElement, LiteralExpression, MatchArm, MatchBody, MatchExpression, MemberAccess, NewObjectExpression, ParamList, Parameter, Pattern, ReturnStatement, ReturnType, SelfParameter, SelfReference, TypeExpression, TypeIdentifier, TypeParameter, TypeParameterList, UnaryExpression, VariableBinding, WhileStatement, WildcardPattern, } pub struct Tree<'a> { pub kind: TreeKind, pub parent: Option, // TODO: Do we actually need this? pub start_pos: usize, pub end_pos: usize, pub children: Vec>, } impl<'a> Tree<'a> { pub fn nth_token(&self, index: usize) -> Option<&Token<'a>> { self.children .get(index) .map(|c| match c { Child::Token(t) => Some(t), _ => None, }) .flatten() } pub fn nth_tree(&self, index: usize) -> Option { self.children .get(index) .map(|c| match c { Child::Tree(t) => Some(*t), _ => None, }) .flatten() } pub fn child_trees<'b>(&'b self) -> impl Iterator + 'b { self.children.iter().filter_map(|c| match c { Child::Tree(t) => Some(*t), _ => None, }) } pub fn children_of_kind<'b>( &'b self, s: &'b SyntaxTree, kind: TreeKind, ) -> impl Iterator + 'b { self.child_trees() .filter_map(move |t| if s[t].kind == kind { Some(t) } else { None }) } pub fn child_of_kind(&self, s: &SyntaxTree, kind: TreeKind) -> Option { self.children_of_kind(&s, kind).next() } pub fn child_tree_of_kind<'b>( &'b self, s: &'b SyntaxTree<'a>, kind: TreeKind, ) -> Option<&'b Tree<'a>> { self.child_of_kind(s, kind).map(|t| &s[t]) } pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) { let _ = write!(output, "{:?}", self.kind); if with_positions { let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos); } let _ = write!(output, "\n"); for child in self.children.iter() { child.dump_rec(2, tree, with_positions, output); } } } impl<'a> std::fmt::Debug for Tree<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:?} [{}-{})", self.kind, self.start_pos, self.end_pos) } } #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] pub struct TreeRef(NonZeroU32); impl TreeRef { pub fn from_index(index: usize) -> TreeRef { let index: u32 = (index + 1).try_into().unwrap(); TreeRef(NonZeroU32::new(index).unwrap()) } pub fn index(&self) -> usize { let index: usize = self.0.get().try_into().unwrap(); index - 1 } } pub enum Child<'a> { Token(Token<'a>), Tree(TreeRef), } impl<'a> Child<'a> { fn dump_rec( &self, indent: usize, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String, ) { for _ in 0..indent { let _ = write!(output, " "); } match self { Child::Token(t) => { let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str()); if with_positions { let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len()); } let _ = write!(output, "\n"); } Child::Tree(t) => { let t = &tree[*t]; let _ = write!(output, "{:?}", t.kind); if with_positions { let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos); } let _ = write!(output, "\n"); for child in t.children.iter() { child.dump_rec(indent + 2, tree, with_positions, output); } } } } pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize { match &self { Child::Token(t) => t.start, Child::Tree(t) => syntax_tree[*t].start_pos, } } pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize { match &self { Child::Token(t) => t.start + t.as_str().len(), Child::Tree(t) => syntax_tree[*t].end_pos, } } } enum ParseEvent<'a> { Start { kind: TreeKind }, End, Advance { token: Token<'a> }, } struct MarkStarted { index: usize, } struct MarkClosed { index: usize, } struct CParser<'a> { tokens: Tokens<'a>, current: Token<'a>, next: Token<'a>, fuel: Cell, events: Vec>, panic: bool, } impl<'a> CParser<'a> { fn new(tokens: Tokens<'a>) -> Self { let mut parser = CParser { tokens, current: Token::new(TokenKind::EOF, 0, ""), next: Token::new(TokenKind::EOF, 0, ""), fuel: Cell::new(256), events: Vec::new(), panic: false, }; // Getting started: put the first token into `next` and then fast // forward past ephemera to the first "real" token. parser.next_real_token(); // Put `next` into `current`. std::mem::swap(&mut parser.current, &mut parser.next); // Now set `next` to the *next* real token. parser.next_real_token(); parser } fn start(&mut self) -> MarkStarted { let mark = MarkStarted { index: self.events.len(), }; self.events.push(ParseEvent::Start { kind: TreeKind::Error, }); mark } fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed { self.events[mark.index] = ParseEvent::Start { kind }; self.events.push(ParseEvent::End); MarkClosed { index: mark.index } } fn start_before(&mut self, mark: MarkClosed) -> MarkStarted { // TODO: Point backwards and pointer chase in tree build? let mark = MarkStarted { index: mark.index }; self.events.insert( mark.index, ParseEvent::Start { kind: TreeKind::Error, }, ); mark } fn advance(&mut self) { assert!(!self.eof()); // Don't try to advance past EOF self.fuel.set(256); // Consuming a token, reset stuck detector self.events.push(ParseEvent::Advance { token: self.current.clone(), }); // Move next into current (and current into next but who cares, thanks rust.) std::mem::swap(&mut self.current, &mut self.next); self.next_real_token(); } fn next_real_token(&mut self) { self.next = self.tokens.next(); while self.next.kind == TokenKind::Whitespace || self.next.kind == TokenKind::Comment { self.next = self.tokens.next(); } } fn eof(&self) -> bool { self.current.kind == TokenKind::EOF } fn peek(&self) -> TokenKind { if self.fuel.get() == 0 { panic!( "parser is stuck at '{}' ({})!", self.current, self.current.start ); } self.fuel.set(self.fuel.get() - 1); self.current.kind } fn peek_next(&self) -> TokenKind { if self.fuel.get() == 0 { panic!( "parser is stuck at '{}' ({})!", self.current, self.current.start ); } self.fuel.set(self.fuel.get() - 1); self.next.kind } // fn trace(&self, msg: &str) { // eprintln!("{}: {}: {}", self.current.start, self.current, msg); // } fn at_any(&self, kinds: &[TokenKind]) -> bool { for kind in kinds { if self.at(*kind) { return true; } } return false; } fn at(&self, kind: TokenKind) -> bool { self.peek() == kind } fn eat(&mut self, kind: TokenKind) -> bool { if self.at(kind) { self.panic = false; // Check self.advance(); true } else { false } } fn expect(&mut self, kind: TokenKind, error: T) where T: Into, { if self.eat(kind) { return; } self.error(error); } fn expect_start(&mut self, kind: TokenKind) { assert!(self.eat(kind), "should have started with {kind:?}"); } fn advance_with_error(&mut self, error: T) -> MarkClosed where T: Into, { let m = self.start(); self.error(error); self.advance(); self.end(m, TreeKind::Error) } fn error(&mut self, message: T) where T: Into, { self.error_at(self.current.clone(), message) } fn error_at(&mut self, token: Token<'a>, message: T) where T: Into, { if self.panic { return; } self.panic = true; let message: String = message.into(); let mut final_message = "Error ".to_string(); if token.kind == TokenKind::EOF { final_message.push_str("at end") } else if token.kind != TokenKind::Error { final_message.push_str("at '"); final_message.push_str(token.as_str()); final_message.push_str("'"); } final_message.push_str(": "); final_message.push_str(&message); self.events.push(ParseEvent::Advance { token: Token::error(token.start, final_message), }); } fn build_tree(self) -> (SyntaxTree<'a>, Lines) { let mut events = self.events; let mut stack = Vec::new(); let mut result = SyntaxTree::new(); // The first element in our events vector must be a start; the whole // thing must be bracketed in a tree. assert!(matches!(events.get(0), Some(ParseEvent::Start { .. }))); // The last element in our events vector must be an end, otherwise // the parser has failed badly. We'll remove it here so that, after // processing the entire array, the stack retains the tree that we // start with the very first ::Start. assert!(matches!(events.pop(), Some(ParseEvent::End))); for event in events { match event { ParseEvent::Start { kind } => stack.push(Tree { kind, parent: None, start_pos: 0, end_pos: 0, children: Vec::new(), }), ParseEvent::End => { let t = result.add_tree(stack.pop().unwrap()); stack.last_mut().unwrap().children.push(Child::Tree(t)); } ParseEvent::Advance { token } => { stack.last_mut().unwrap().children.push(Child::Token(token)); } } } assert!(stack.len() == 1, "Not all trees were ended!"); let root = result.add_tree(stack.pop().unwrap()); result.root = Some(root); (result, self.tokens.lines()) } } pub fn parse(source: &str) -> (SyntaxTree, Lines) { let tokens = Tokens::new(source); let mut parser = CParser::new(tokens); file(&mut parser); parser.build_tree() } fn file(p: &mut CParser) { let m = p.start(); while !p.eof() { match p.peek() { TokenKind::Class => class(p), TokenKind::RightBrace => { // An error parsing mismatched braces can leave me at an // un-balanced right brace, which unfortunately will not be // consumed by the statement below. (Statement currently // falls through to expression_statement, which checks for // the right-brace that a block would end with.) p.advance_with_error("unbalanced '}'"); } _ => { if !statement(p) { if p.at_any(STATEMENT_RECOVERY) { break; } else { p.advance_with_error("expected statement"); } } } } } p.end(m, TreeKind::File); } fn function(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Fun); p.expect(TokenKind::Identifier, "expected a function name"); if p.at(TokenKind::LeftParen) { param_list(p); } if p.at(TokenKind::Arrow) { return_type(p); } if p.at(TokenKind::LeftBrace) { block(p); } p.end(m, TreeKind::FunctionDecl); } fn class(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Class); p.expect(TokenKind::Identifier, "expected a class name"); if p.eat(TokenKind::LeftBrace) { while !p.at(TokenKind::RightBrace) && !p.eof() { if p.at(TokenKind::Identifier) { field_decl(p); } else if p.at(TokenKind::Fun) { function(p); } else { p.advance_with_error("expected a field declaration"); } } } p.expect(TokenKind::RightBrace, "expected a class to end with a '}'"); p.end(m, TreeKind::ClassDecl); } fn field_decl(p: &mut CParser) { let m = p.start(); p.expect(TokenKind::Identifier, "expected a field name"); if p.eat(TokenKind::Colon) { type_expr(p); } p.expect( TokenKind::Semicolon, "expect a ';' after field declarations", ); p.end(m, TreeKind::FieldDecl); } const PARAM_LIST_RECOVERY: &[TokenKind] = &[ TokenKind::Arrow, TokenKind::LeftBrace, TokenKind::Fun, TokenKind::RightParen, ]; fn param_list(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::LeftParen); while !p.at_any(PARAM_LIST_RECOVERY) && !p.eof() { if p.at(TokenKind::Identifier) { parameter(p); } else if p.at(TokenKind::Selff) { self_parameter(p); } else { p.advance_with_error("expected parameter"); } } p.expect(TokenKind::RightParen, "expect ')' to end a parameter list"); p.end(m, TreeKind::ParamList); } fn parameter(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Identifier); if p.eat(TokenKind::Colon) { type_expr(p); } if !p.at(TokenKind::RightParen) { p.expect(TokenKind::Comma, "expected a comma between parameters"); } p.end(m, TreeKind::Parameter); } fn self_parameter(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Selff); if p.eat(TokenKind::Colon) { p.error("self parameters cannot have explicit types"); type_expr(p); } if !p.at(TokenKind::RightParen) { p.expect(TokenKind::Comma, "expected a comma between parameters"); } p.end(m, TreeKind::SelfParameter); } fn return_type(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Arrow); type_expr(p); if !p.at(TokenKind::LeftBrace) { p.error("expected a block after a return type"); while !p.at_any(STATEMENT_RECOVERY) && !p.eof() { p.advance(); } } p.end(m, TreeKind::ReturnType); } fn type_expr(p: &mut CParser) { let m = p.start(); alternate_type(p); p.end(m, TreeKind::TypeExpression); } fn alternate_type(p: &mut CParser) { let mut result = type_identifier(p); while p.eat(TokenKind::Or) { let m = p.start_before(result); type_identifier(p); result = p.end(m, TreeKind::AlternateType); } } fn type_identifier(p: &mut CParser) -> MarkClosed { let m = p.start(); p.expect(TokenKind::Identifier, "expected the identifier of a type"); if p.at(TokenKind::Less) { type_parameter_list(p); } p.end(m, TreeKind::TypeIdentifier) } fn type_parameter_list(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Less); while !p.at(TokenKind::Greater) && !p.eof() { if p.at(TokenKind::Identifier) { type_parameter(p); } else { break; } } p.expect(TokenKind::Greater, "expected > to end type parameter list"); p.end(m, TreeKind::TypeParameterList); } fn type_parameter(p: &mut CParser) { assert!(p.at(TokenKind::Identifier)); let m = p.start(); type_expr(p); if !p.at(TokenKind::Greater) { p.expect(TokenKind::Comma, "expect a comma between type parameters"); } p.end(m, TreeKind::TypeParameter); } const STATEMENT_RECOVERY: &[TokenKind] = &[ TokenKind::RightBrace, TokenKind::Fun, TokenKind::LeftBrace, TokenKind::Let, TokenKind::Return, TokenKind::For, TokenKind::Class, TokenKind::While, ]; fn block(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::LeftBrace); while !p.at(TokenKind::RightBrace) && !p.eof() { if !statement(p) { if p.at_any(STATEMENT_RECOVERY) { break; } else { p.advance_with_error("expected statement"); } } } p.expect(TokenKind::RightBrace, "expect '}' to end a block"); p.end(m, TreeKind::Block); } fn statement(p: &mut CParser) -> bool { match p.peek() { TokenKind::Fun => function(p), TokenKind::LeftBrace => block(p), TokenKind::Let => statement_let(p), TokenKind::Return => statement_return(p), TokenKind::For => statement_for(p), // NOTE: Technically 'if' is an expression, but `if` doesn't // require a semicolon at the end if it's all by itself. TokenKind::If => statement_if(p), TokenKind::While => statement_while(p), _ => { if p.at(TokenKind::Semicolon) || p.at_any(EXPRESSION_FIRST) { statement_expression(p) } else { return false; } } } true } fn statement_if(p: &mut CParser) { assert!(p.at(TokenKind::If)); let m = p.start(); conditional(p); p.end(m, TreeKind::IfStatement); } fn statement_while(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::While); if p.at_any(EXPRESSION_FIRST) { expression(p); } else { p.error("expected an expression for the loop condition"); } if p.at(TokenKind::LeftBrace) { block(p); } else { p.error("expected a block for the loop body"); } p.end(m, TreeKind::WhileStatement); } fn statement_let(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Let); p.expect(TokenKind::Identifier, "expected a name for the variable"); p.expect(TokenKind::Equal, "expected a '=' after the variable name"); if p.at_any(EXPRESSION_FIRST) { expression(p); } if !p.at(TokenKind::RightBrace) { p.expect(TokenKind::Semicolon, "expect ';' to end a let statement"); } p.end(m, TreeKind::LetStatement); } fn statement_return(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Return); if p.at_any(EXPRESSION_FIRST) { // TODO: Make expression optional if we're returning () expression(p); } if !p.at(TokenKind::RightBrace) { p.expect(TokenKind::Semicolon, "expect ';' to end a return statement"); } p.end(m, TreeKind::ReturnStatement); } fn statement_for(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::For); iterator_variable(p); p.expect(TokenKind::In, "expect an 'in' after the loop variable"); if p.at_any(EXPRESSION_FIRST) { expression(p); } if p.at(TokenKind::LeftBrace) { block(p); } p.end(m, TreeKind::ForStatement); } fn iterator_variable(p: &mut CParser) { let m = p.start(); p.expect( TokenKind::Identifier, "expected an identifier for the iterator variable", ); p.end(m, TreeKind::IteratorVariable); } fn statement_expression(p: &mut CParser) { let m = p.start(); if p.at_any(EXPRESSION_FIRST) { expression(p); } if !p.at(TokenKind::RightBrace) { p.expect( TokenKind::Semicolon, "expect ';' to end an expression statement", ); } p.end(m, TreeKind::ExpressionStatement); } const EXPRESSION_FIRST: &[TokenKind] = &[ TokenKind::Number, TokenKind::String, TokenKind::True, TokenKind::False, TokenKind::LeftParen, TokenKind::Bang, TokenKind::Minus, TokenKind::If, TokenKind::Identifier, TokenKind::Selff, TokenKind::LeftBracket, TokenKind::New, TokenKind::Match, ]; fn expression(p: &mut CParser) { expression_with_power(p, 0) } const UNARY_POWER: u8 = 16; fn infix_power(token: TokenKind) -> Option<(u8, u8)> { // A dumb thing: the pair controls associativity. // // If lhs < rhs then it's left-associative, otherwise it's // right-associative. match token { TokenKind::Equal => Some((1, 0)), TokenKind::Or => Some((2, 3)), TokenKind::Is => Some((4, 5)), TokenKind::And => Some((6, 7)), TokenKind::EqualEqual | TokenKind::BangEqual => Some((8, 9)), TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => { Some((10, 11)) } TokenKind::Plus | TokenKind::Minus => Some((12, 13)), TokenKind::Star | TokenKind::Slash => Some((14, 15)), // // UNARY_POWER goes here. // TokenKind::LeftParen => Some((18, 19)), TokenKind::Dot => Some((20, 21)), _ => None, } } fn expression_with_power(p: &mut CParser, minimum_power: u8) { let Some(mut expr) = prefix_expression(p) else { return; }; loop { let token = p.peek(); let Some((lp, rp)) = infix_power(token) else { break; }; if lp < minimum_power { break; } expr = match token { TokenKind::Dot => member_access(p, expr), TokenKind::Is => is_expression(p, expr, rp), TokenKind::LeftParen => call(p, expr), _ => binary_expression(p, expr, rp), }; } } fn member_access(p: &mut CParser, left: MarkClosed) -> MarkClosed { let m = p.start_before(left); p.advance(); // Consume the operator p.expect( TokenKind::Identifier, "expected an identifier after a '.' in member access", ); p.end(m, TreeKind::MemberAccess) } fn binary_expression(p: &mut CParser, left: MarkClosed, right_power: u8) -> MarkClosed { let m = p.start_before(left); p.advance(); // Consume the operator expression_with_power(p, right_power); p.end(m, TreeKind::BinaryExpression) } fn is_expression(p: &mut CParser, left: MarkClosed, right_power: u8) -> MarkClosed { let m = p.start_before(left); p.advance(); // Consume the operator pattern(p, right_power); p.end(m, TreeKind::IsExpression) } fn call(p: &mut CParser, left: MarkClosed) -> MarkClosed { let m = p.start_before(left); argument_list(p); p.end(m, TreeKind::CallExpression) } const PATTERN_START: &[TokenKind] = &[TokenKind::Identifier, TokenKind::Underscore]; fn pattern(p: &mut CParser, right_power: u8) { let m = p.start(); // patterns are very simple. if p.peek() == TokenKind::Identifier && p.peek_next() == TokenKind::Colon { variable_binding(p); } if p.peek() == TokenKind::Underscore { wildcard_pattern(p); } else { type_expr(p); } if p.eat(TokenKind::And) { expression_with_power(p, right_power); } p.end(m, TreeKind::Pattern); } fn variable_binding(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Identifier); p.expect_start(TokenKind::Colon); p.end(m, TreeKind::VariableBinding); } fn wildcard_pattern(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Underscore); p.end(m, TreeKind::WildcardPattern); } fn argument_list(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::LeftParen); while !p.at(TokenKind::RightParen) && !p.eof() { if p.at_any(EXPRESSION_FIRST) { argument(p); } else { break; } } p.expect( TokenKind::RightParen, "expect an argument list to start with '('", ); p.end(m, TreeKind::ArgumentList); } fn argument(p: &mut CParser) { let m = p.start(); expression(p); if !p.at(TokenKind::RightParen) { p.expect(TokenKind::Comma, "expect a ',' between arguments"); } p.end(m, TreeKind::Argument); } fn prefix_expression(p: &mut CParser) -> Option { let result = match p.peek() { TokenKind::Number => literal(p), TokenKind::String => literal(p), TokenKind::True => literal(p), TokenKind::False => literal(p), TokenKind::LeftParen => grouping(p), TokenKind::Bang => unary(p), TokenKind::Minus => unary(p), TokenKind::If => conditional(p), TokenKind::Identifier => identifier(p), TokenKind::Selff => self_reference(p), TokenKind::LeftBracket => list_constructor(p), TokenKind::New => object_constructor(p), TokenKind::Match => match_expression(p), _ => { assert!( !p.at_any(EXPRESSION_FIRST), "TokenKind::{:?} is in EXPRESSION_FIRST but not handled; is this a new kind of prefix?", p.peek() ); return None; } }; Some(result) } fn literal(p: &mut CParser) -> MarkClosed { let m = p.start(); p.advance(); p.end(m, TreeKind::LiteralExpression) } fn grouping(p: &mut CParser) -> MarkClosed { let m = p.start(); p.expect_start(TokenKind::LeftParen); expression(p); p.expect(TokenKind::RightParen, "unmatched parentheses in expression"); p.end(m, TreeKind::GroupingExpression) } fn unary(p: &mut CParser) -> MarkClosed { let m = p.start(); p.advance(); // Past the operator expression_with_power(p, UNARY_POWER); p.end(m, TreeKind::UnaryExpression) } fn conditional(p: &mut CParser) -> MarkClosed { let m = p.start(); p.expect_start(TokenKind::If); expression(p); if p.at(TokenKind::LeftBrace) { block(p) } else { p.error("expected a block after `if`") } if p.eat(TokenKind::Else) { if p.at(TokenKind::If) { // Don't require another block, just jump right into the conditional. conditional(p); } else if p.at(TokenKind::LeftBrace) { block(p); } else { p.error("expected a block after `else`") } } p.end(m, TreeKind::ConditionalExpression) } fn identifier(p: &mut CParser) -> MarkClosed { assert!(p.at(TokenKind::Identifier)); let m = p.start(); p.advance(); p.end(m, TreeKind::Identifier) } fn self_reference(p: &mut CParser) -> MarkClosed { assert!(p.at(TokenKind::Selff)); let m = p.start(); p.advance(); p.end(m, TreeKind::SelfReference) } fn list_constructor(p: &mut CParser) -> MarkClosed { let m = p.start(); p.expect_start(TokenKind::LeftBracket); while !p.at(TokenKind::RightBracket) && !p.eof() { if p.at_any(EXPRESSION_FIRST) { list_constructor_element(p); } else { break; } } p.expect( TokenKind::RightBracket, "expected a ] to end the list constructor", ); p.end(m, TreeKind::ListConstructor) } fn list_constructor_element(p: &mut CParser) { let m = p.start(); expression(p); if !p.at(TokenKind::RightBracket) { p.expect( TokenKind::Comma, "expected a comma between list constructor elements", ); } p.end(m, TreeKind::ListConstructorElement); } fn object_constructor(p: &mut CParser) -> MarkClosed { let m = p.start(); p.expect_start(TokenKind::New); type_identifier(p); if p.at(TokenKind::LeftBrace) { field_list(p); } else { p.error("expected a '{' to start the field list after the class type"); } p.end(m, TreeKind::NewObjectExpression) } fn field_list(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::LeftBrace); while !p.at(TokenKind::RightBrace) && !p.eof() { if p.at(TokenKind::Identifier) { field_value(p); } else { if p.at_any(STATEMENT_RECOVERY) { break; } p.advance_with_error("expected an identifier in a field list"); } } p.expect( TokenKind::RightBrace, "expected the field list to end with '}'", ); p.end(m, TreeKind::FieldList); } fn field_value(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::Identifier); if p.eat(TokenKind::Colon) { expression(p); } if !p.at(TokenKind::RightBrace) { p.expect(TokenKind::Comma, "expect a ',' between fields"); } p.end(m, TreeKind::FieldValue); } fn match_expression(p: &mut CParser) -> MarkClosed { let m = p.start(); p.expect_start(TokenKind::Match); expression(p); // ? if p.at(TokenKind::LeftBrace) { match_body(p); } else { p.error("expected a '{' to start the alternatives after `match`"); } p.end(m, TreeKind::MatchExpression) } fn match_body(p: &mut CParser) { let m = p.start(); p.expect_start(TokenKind::LeftBrace); while !p.at(TokenKind::RightBrace) && !p.eof() { if p.at_any(PATTERN_START) { // TODO: type_expr_first ? match_arm(p); } else { if p.at_any(STATEMENT_RECOVERY) { break; } p.advance_with_error("expected a type expression to start a match arm"); } } p.expect( TokenKind::RightBrace, "expected a '}' to end the alternatives in a match", ); p.end(m, TreeKind::MatchBody); } fn match_arm(p: &mut CParser) { let m = p.start(); pattern(p, 0); if p.eat(TokenKind::Arrow) { expression(p); } else { p.error("expected an arrow after the pattern in a match arm"); } if !p.at(TokenKind::RightBrace) { p.expect(TokenKind::Comma, "expected a comma between match arms"); } p.end(m, TreeKind::MatchArm); } #[cfg(test)] mod tests { use super::*; #[test] fn tree_ref_size() { // What's the point of doing all that work if the tree ref isn't nice // and "small"? TreeRef is pervasive throughout the system: we use // them to key function definitions and the type checker and use them // to link classes to their definitions, etc. It's important that an // Option be *extremely* cheap to manipulate. // // TODO: This optimization isn't as good as it might be because tokens are // huge so Child is huge no matter what we do. If we retain // tokens out of line then we can take full advantage of this. assert_eq!(4, std::mem::size_of::>()); } }