From 0b0b5d72d0c406230cb850bb4de822b88d5e00bd Mon Sep 17 00:00:00 2001 From: John Doty Date: Sat, 6 Apr 2024 17:39:33 -0700 Subject: [PATCH] [fine] Tokens are by reference, ephemera It's another jump, perhaps, but smaller arrays, and now we can track ephemera efficiently without bloating child trees. (We could also put ephemera inline with the child trees but then nth_token would be unwieldy, and it would lower our data density.) --- fine/src/compiler.rs | 100 ++++++++++++++++-------- fine/src/parser.rs | 128 +++++++++++++++++++++++++++--- fine/src/semantics.rs | 175 ++++++++++++++++++++++++++---------------- 3 files changed, 298 insertions(+), 105 deletions(-) diff --git a/fine/src/compiler.rs b/fine/src/compiler.rs index 09a095f8..bf55470d 100644 --- a/fine/src/compiler.rs +++ b/fine/src/compiler.rs @@ -2,11 +2,11 @@ use std::collections::HashMap; use std::rc::Rc; use crate::{ - parser::{Child, SyntaxTree, Tree, TreeKind, TreeRef}, + parser::{Child, SyntaxTree, TokenRef, Tree, TreeKind, TreeRef}, semantics::{ string_constant_to_string, Declaration, Location, ModuleId, Origin, Semantics, Type, }, - tokens::TokenKind, + tokens::{Token, TokenKind}, }; pub const EXTERN_BUILTIN_NOOP: usize = 0; @@ -163,6 +163,42 @@ struct Compiler<'a> { function: Function, } +impl<'a> std::ops::Index for Compiler<'a> { + type Output = Tree; + + #[inline] + fn index(&self, index: TreeRef) -> &Self::Output { + &self.syntax[index] + } +} + +impl<'a> std::ops::Index<&TreeRef> for Compiler<'a> { + type Output = Tree; + + #[inline] + fn index(&self, index: &TreeRef) -> &Self::Output { + &self.syntax[index] + } +} + +impl<'a> std::ops::Index for Compiler<'a> { + type Output = Token; + + #[inline] + fn index(&self, index: TokenRef) -> &Self::Output { + &self.syntax[index] + } +} + +impl<'a> std::ops::Index<&TokenRef> for Compiler<'a> { + type Output = Token; + + #[inline] + fn index(&self, index: &TokenRef) -> &Self::Output { + &self.syntax[index] + } +} + impl<'a> Compiler<'a> { fn add_string(&mut self, result: String) -> usize { let index = self.function.strings.len(); @@ -261,7 +297,7 @@ fn function_from_function_decl( tree: &Tree, ) -> Result { // TODO: If this is a method the name should be different. - let name = tree.nth_token(1).ok_or("no id")?.as_str(source); + let name = syntax[tree.nth_token(1).ok_or("no id")?].as_str(source); let param_list = tree .child_tree_of_kind(syntax, TreeKind::ParamList) @@ -271,8 +307,12 @@ fn function_from_function_decl( Ok(Function::new(name, param_count)) } -fn function_from_class_decl(source: &str, tree: &Tree) -> Result { - let name = tree.nth_token(1).ok_or("no name")?.as_str(source); +fn function_from_class_decl( + source: &str, + syntax: &SyntaxTree, + tree: &Tree, +) -> Result { + let name = syntax[tree.nth_token(1).ok_or("no name")?].as_str(source); // TODO: I think this is incorrect! let field_count = tree.children.len() - 2; @@ -306,7 +346,7 @@ pub fn compile_module(semantics: &Semantics) -> Rc { let tree = &semantics.tree()[t]; let function = match tree.kind { TreeKind::FunctionDecl => function_from_function_decl(&source, &syntax_tree, tree), - TreeKind::ClassDecl => function_from_class_decl(&source, tree), + TreeKind::ClassDecl => function_from_class_decl(&source, &syntax_tree, tree), _ => Err("don't know how to make a function of this"), }; @@ -335,7 +375,7 @@ pub fn compile_module(semantics: &Semantics) -> Rc { } fn file(c: &mut Compiler, t: TreeRef) { - let tree = &c.syntax[t]; + let tree = &c[t]; compiler_assert_eq!(c, t, tree.kind, TreeKind::File, "must be compiling a file"); let children: Vec<_> = tree.child_trees().collect(); @@ -381,7 +421,7 @@ fn compile_expression(c: &mut Compiler, t: TreeRef) { } fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { - let tok = tr.nth_token(0).ok_or("no token")?; + let tok = &c[tr.nth_token(0).ok_or("no token")?]; match c.semantics.type_of(t) { Type::F64 => c.push(Instruction::PushFloat( tok.as_str(c.source).parse().unwrap(), @@ -410,7 +450,7 @@ fn compile_grouping(c: &mut Compiler, t: &Tree) -> CR { fn compile_unary_operator(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { compile_expression(c, tr.nth_tree(1).ok_or("no arg")?); - let tok = tr.nth_token(0).ok_or("no op")?; + let tok = &c[tr.nth_token(0).ok_or("no op")?]; match tok.kind { TokenKind::Minus => { c.push(Instruction::PushFloat(-1.0)); @@ -464,7 +504,7 @@ where } fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { - let op = tr.nth_token(1).ok_or("no op")?; + let op = &c[tr.nth_token(1).ok_or("no op")?]; match op.kind { TokenKind::Plus => compile_simple_binary_expression(c, tr, |c, t| match t { Type::F64 => Instruction::FloatAdd, @@ -583,7 +623,7 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { c.push(Instruction::Dup); let lvalue = tr.nth_tree(0).ok_or("no lvalue")?; - let ltree = &c.syntax[lvalue]; + let ltree = &c[lvalue]; #[allow(unused_assignments)] let mut environment = None; @@ -591,7 +631,7 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { let declaration = match ltree.kind { // TODO: Assign to list access TreeKind::Identifier => { - let id = ltree.nth_token(0).ok_or("no id")?.as_str(&c.source); + let id = c[ltree.nth_token(0).ok_or("no id")?].as_str(&c.source); environment = Some(c.semantics.environment_of(lvalue)); environment .as_ref() @@ -600,7 +640,7 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { .ok_or("cannot bind destination")? } TreeKind::MemberAccess => { - let id = ltree.nth_token(2).ok_or("no member")?.as_str(&c.source); + let id = c[ltree.nth_token(2).ok_or("no member")?].as_str(&c.source); let t = ltree.nth_tree(0).ok_or("no lhs exp")?; let typ = c.semantics.type_of(t); @@ -655,7 +695,7 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { } fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { - let ident = tree.nth_token(0).ok_or("no ident")?.as_str(&c.source); + let ident = c[tree.nth_token(0).ok_or("no ident")?].as_str(&c.source); let environment = c.semantics.environment_of(t); let declaration = environment.bind(ident).ok_or("not found")?; @@ -758,15 +798,15 @@ fn compile_pattern(c: &mut Compiler, t: TreeRef) -> CR { let type_expr = tree.child_tree_of_kind(&c.syntax, TreeKind::TypeExpression); - let and_index = tree.children.iter().position(|c| match c { - Child::Token(t) => t.kind == TokenKind::And, + let and_index = tree.children.iter().position(|child| match child { + Child::Token(t) => c[t].kind == TokenKind::And, _ => false, }); // If you have a binding, dup and store now, it is in scope. if let Some(binding) = tree.child_tree_of_kind(&c.syntax, TreeKind::VariableBinding) { if let Some(variable) = binding.nth_token(0) { - let id = variable.as_str(&c.source); + let id = c[variable].as_str(&c.source); let environment = c.semantics.environment_of(t); let Some(declaration) = environment.bind(id) else { ice!(c, t, "cannot bind pattern variable `{id}`"); @@ -845,7 +885,7 @@ fn compile_type_expr_eq(c: &mut Compiler, t: TreeRef) { } fn compile_type_identifier_eq(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { - let identifier = tree.nth_token(0).ok_or("no id")?.as_str(&c.source); + let identifier = c[tree.nth_token(0).ok_or("no id")?].as_str(&c.source); match identifier { "f64" => { c.push(Instruction::IsFloat); @@ -971,7 +1011,7 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C let mut field_bindings = HashMap::new(); for field in field_list.children_of_kind(&c.syntax, TreeKind::FieldValue) { let f = &c.syntax[field]; - let name = f.nth_token(0).ok_or("no field name")?; + let name = &c.syntax[f.nth_token(0).ok_or("no field name")?]; field_bindings.insert(name.as_str(&c.source), field); } @@ -990,10 +1030,8 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C let type_reference = tree .child_tree_of_kind(&c.syntax, TreeKind::TypeIdentifier) .ok_or("no type ref")?; - let identifier = type_reference - .nth_token(0) - .ok_or("no type id")? - .as_str(&c.source); + let identifier = type_reference.nth_token(0).ok_or("no type id")?; + let identifier = c[identifier].as_str(&c.source); let environment = c.semantics.environment_of(t); let declaration = environment.bind(identifier).ok_or("cannot bind type")?; match declaration.location { @@ -1008,7 +1046,7 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C fn compile_field_value(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { if let Some(colon) = tree.nth_token(1) { - if colon.kind == TokenKind::Colon { + if c[colon].kind == TokenKind::Colon { compile_expression(c, tree.nth_tree(2).ok_or("no val")?); return OK; } @@ -1016,7 +1054,7 @@ fn compile_field_value(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { // Form 2: { x, ... } let environment = c.semantics.environment_of(t); - let id = tree.nth_token(0).ok_or("no id")?.as_str(&c.source); + let id = c[tree.nth_token(0).ok_or("no id")?].as_str(&c.source); let declaration = environment.bind(id).ok_or("cannot bind")?; compile_load_declaration(c, t, declaration) @@ -1030,7 +1068,7 @@ fn compile_member_access(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { compile_expression(c, lhs); let typ = c.semantics.type_of(lhs); - let ident = tree.nth_token(2).ok_or("no ident")?.as_str(&c.source); + let ident = c[tree.nth_token(2).ok_or("no ident")?].as_str(&c.source); let environment = c.semantics.member_environment(t, &typ); let declaration = environment.bind(ident).ok_or("cannot bind")?; @@ -1114,7 +1152,7 @@ fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) if tree .nth_token(1) - .is_some_and(|t| t.kind == TokenKind::Semicolon) + .is_some_and(|t| c[t].kind == TokenKind::Semicolon) { c.push(Instruction::Discard); if gen_value { @@ -1134,7 +1172,7 @@ fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: b compile_expression(c, tree.nth_tree(3).ok_or("no val")?); let environment = c.semantics.environment_of(t); let declaration = environment - .bind(tree.nth_token(1).ok_or("no id")?.as_str(&c.source)) + .bind(c[tree.nth_token(1).ok_or("no id")?].as_str(&c.source)) .ok_or("cannot bind")?; // TODO: ASSERT LOCAL DECLARATION? @@ -1198,7 +1236,7 @@ fn compile_function(c: &mut Compiler, t: TreeRef) -> CR { c.push(Instruction::LoadArgument(count - 1 - i)); } - let name = tree.nth_token(1).ok_or("no name")?.as_str(&c.source); + let name = c[tree.nth_token(1).ok_or("no name")?].as_str(&c.source); let name_index = c.add_string(name.to_string()); c.push(Instruction::PushString(name_index)); c.push(Instruction::PushInt(t.index().try_into().unwrap())); @@ -1250,8 +1288,8 @@ fn compile_return_statement(c: &mut Compiler, tree: &Tree) -> CR { fn compile_for_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR { // Figure out the variable. let vt = tree.nth_tree(1).ok_or("no var")?; - let var = &c.syntax[vt]; - let id = var.nth_token(0).ok_or("no id")?.as_str(&c.source); + let var = &c[vt]; + let id = c[var.nth_token(0).ok_or("no id")?].as_str(&c.source); let body = tree.nth_tree(4).ok_or("no body")?; let env = c.semantics.environment_of(body); diff --git a/fine/src/parser.rs b/fine/src/parser.rs index 5a22dabb..557421e5 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -7,6 +7,9 @@ use std::{cell::Cell, num::NonZeroU32}; pub struct SyntaxTree { trees: Vec, + tokens: Vec, + ephemera: Vec>, + trailing_ephemera: Vec, root: Option, } @@ -14,6 +17,9 @@ impl SyntaxTree { pub fn new() -> Self { SyntaxTree { trees: vec![], + tokens: vec![], + ephemera: vec![], + trailing_ephemera: vec![], root: None, } } @@ -22,6 +28,13 @@ impl SyntaxTree { self.root } + pub fn add_token(&mut self, t: Token, ephemera: Vec) -> TokenRef { + let tr = TokenRef::from_index(self.tokens.len()); + self.tokens.push(t); + self.ephemera.push(ephemera); + tr + } + pub fn add_tree(&mut self, mut t: Tree) -> TreeRef { assert!(t.parent.is_none()); let tr = TreeRef::from_index(self.trees.len()); @@ -52,6 +65,14 @@ impl SyntaxTree { tr } + pub fn ephemera_before(&self, token: TokenRef) -> &[Token] { + &self.ephemera[token.index()] + } + + pub fn trailing_ephemera(&self) -> &[Token] { + &self.trailing_ephemera + } + pub fn dump(&self, source: &str, with_positions: bool) -> String { let mut output = String::new(); if let Some(r) = self.root { @@ -107,17 +128,53 @@ impl SyntaxTree { impl std::ops::Index for SyntaxTree { type Output = Tree; + #[inline] fn index(&self, index: TreeRef) -> &Self::Output { &self.trees[index.index()] } } +impl std::ops::Index<&TreeRef> for SyntaxTree { + type Output = Tree; + + #[inline] + fn index(&self, index: &TreeRef) -> &Self::Output { + &self.trees[index.index()] + } +} + impl std::ops::IndexMut for SyntaxTree { + #[inline] fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output { &mut self.trees[index.index()] } } +impl std::ops::Index for SyntaxTree { + type Output = Token; + + #[inline] + fn index(&self, index: TokenRef) -> &Self::Output { + &self.tokens[index.index()] + } +} + +impl std::ops::Index<&TokenRef> for SyntaxTree { + type Output = Token; + + #[inline] + fn index(&self, index: &TokenRef) -> &Self::Output { + &self.tokens[index.index()] + } +} + +// impl std::ops::IndexMut for SyntaxTree { +// #[inline] +// fn index_mut(&mut self, index: TokenRef) -> &mut Self::Output { +// &mut self.tokens[index.index()] +// } +// } + #[derive(Debug, Eq, PartialEq)] pub enum TreeKind { Error, @@ -182,11 +239,11 @@ pub struct Tree { } impl Tree { - pub fn nth_token(&self, index: usize) -> Option<&Token> { + pub fn nth_token(&self, index: usize) -> Option { self.children .get(index) .map(|c| match c { - Child::Token(t) => Some(t), + Child::Token(t) => Some(*t), _ => None, }) .flatten() @@ -259,8 +316,23 @@ impl TreeRef { } } +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] +pub struct TokenRef(NonZeroU32); + +impl TokenRef { + pub fn from_index(index: usize) -> TokenRef { + let index: u32 = (index + 1).try_into().unwrap(); + TokenRef(NonZeroU32::new(index).unwrap()) + } + + pub fn index(&self) -> usize { + let index: usize = self.0.get().try_into().unwrap(); + index - 1 + } +} + pub enum Child { - Token(Token), + Token(TokenRef), Tree(TreeRef), } @@ -278,6 +350,7 @@ impl Child { } match self { Child::Token(t) => { + let t = &tree[*t]; let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str(source)); if with_positions { let _ = write!(output, " [{}, {})", t.start(), t.end()); @@ -301,14 +374,14 @@ impl Child { pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize { match &self { - Child::Token(t) => t.start(), + Child::Token(t) => syntax_tree[*t].start(), Child::Tree(t) => syntax_tree[*t].start_pos, } } pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize { match &self { - Child::Token(t) => t.end(), + Child::Token(t) => syntax_tree[*t].end(), Child::Tree(t) => syntax_tree[*t].end_pos, } } @@ -317,7 +390,7 @@ impl Child { enum ParseEvent { Start { kind: TreeKind }, End, - Advance { token: Token }, + Advance { token: Token, ephemera: Vec }, } struct MarkStarted { @@ -330,7 +403,9 @@ struct MarkClosed { struct CParser<'a> { tokens: Tokens<'a>, + current_ephemera: Vec, // Ephemera between the previous and current token. current: Token, + next_ephemera: Vec, // Ephemera between the current and next token. next: Token, fuel: Cell, events: Vec, @@ -341,7 +416,9 @@ impl<'a> CParser<'a> { fn new(tokens: Tokens<'a>) -> Self { let mut parser = CParser { tokens, + current_ephemera: Vec::new(), current: Token::new(TokenKind::EOF, 0, 0), + next_ephemera: Vec::new(), next: Token::new(TokenKind::EOF, 0, 0), fuel: Cell::new(256), events: Vec::new(), @@ -354,6 +431,7 @@ impl<'a> CParser<'a> { // Put `next` into `current`. std::mem::swap(&mut parser.current, &mut parser.next); + std::mem::swap(&mut parser.current_ephemera, &mut parser.next_ephemera); // Now set `next` to the *next* real token. parser.next_real_token(); @@ -394,16 +472,20 @@ impl<'a> CParser<'a> { self.fuel.set(256); // Consuming a token, reset stuck detector self.events.push(ParseEvent::Advance { token: self.current.clone(), + ephemera: self.current_ephemera.drain(..).collect(), }); // Move next into current (and current into next but who cares, thanks rust.) std::mem::swap(&mut self.current, &mut self.next); + std::mem::swap(&mut self.current_ephemera, &mut self.next_ephemera); self.next_real_token(); } fn next_real_token(&mut self) { self.next = self.tokens.next(); while self.next.kind == TokenKind::Whitespace || self.next.kind == TokenKind::Comment { - self.next = self.tokens.next(); + let mut next = self.tokens.next(); + std::mem::swap(&mut next, &mut self.next); + self.next_ephemera.push(next); } } @@ -517,6 +599,7 @@ impl<'a> CParser<'a> { self.events.push(ParseEvent::Advance { token: Token::error(token.start(), token.end(), final_message), + ephemera: Vec::new(), }); } @@ -552,8 +635,9 @@ impl<'a> CParser<'a> { stack.last_mut().unwrap().children.push(Child::Tree(t)); } - ParseEvent::Advance { token } => { - stack.last_mut().unwrap().children.push(Child::Token(token)); + ParseEvent::Advance { token, ephemera } => { + let t = result.add_token(token, ephemera); + stack.last_mut().unwrap().children.push(Child::Token(t)); } } } @@ -561,6 +645,7 @@ impl<'a> CParser<'a> { assert!(stack.len() == 1, "Not all trees were ended!"); let root = result.add_tree(stack.pop().unwrap()); result.root = Some(root); + result.trailing_ephemera = self.current_ephemera; (Rc::new(result), Rc::new(self.tokens.lines())) } @@ -1415,4 +1500,29 @@ mod tests { // tokens out of line then we can take full advantage of this. assert_eq!(4, std::mem::size_of::>()); } + + #[test] + fn ephemera() { + let source = "// comment\n5;\n// another"; + let (tree, _) = parse(source); + assert_eq!(2, tree.tokens.len()); + + let tok = &tree.tokens[0]; + assert_eq!(tok.kind, TokenKind::Number); + + let ephemera = &tree.ephemera[0]; + assert_eq!(2, ephemera.len()); + assert_eq!(ephemera[0].kind, TokenKind::Comment); + assert_eq!(ephemera[0].as_str(source), "// comment"); + assert_eq!(ephemera[1].kind, TokenKind::Whitespace); + + let tok = &tree.tokens[1]; + assert_eq!(tok.kind, TokenKind::Semicolon); + + let ephemera = tree.trailing_ephemera(); + assert_eq!(2, ephemera.len()); + assert_eq!(ephemera[0].kind, TokenKind::Whitespace); + assert_eq!(ephemera[1].kind, TokenKind::Comment); + assert_eq!(ephemera[1].as_str(source), "// another"); + } } diff --git a/fine/src/semantics.rs b/fine/src/semantics.rs index 4180389b..27ba5047 100644 --- a/fine/src/semantics.rs +++ b/fine/src/semantics.rs @@ -1,5 +1,5 @@ use crate::{ - parser::{Child, SyntaxTree, Tree, TreeKind, TreeRef}, + parser::{Child, SyntaxTree, TokenRef, Tree, TreeKind, TreeRef}, tokens::{Lines, Token, TokenKind}, vm::StackValue, }; @@ -712,6 +712,42 @@ pub struct Semantics { classes: RefCell>>, } +impl std::ops::Index for Semantics { + type Output = Tree; + + #[inline] + fn index(&self, index: TreeRef) -> &Self::Output { + &self.syntax_tree[index] + } +} + +impl std::ops::Index<&TreeRef> for Semantics { + type Output = Tree; + + #[inline] + fn index(&self, index: &TreeRef) -> &Self::Output { + &self.syntax_tree[index] + } +} + +impl std::ops::Index for Semantics { + type Output = Token; + + #[inline] + fn index(&self, index: TokenRef) -> &Self::Output { + &self.syntax_tree[index] + } +} + +impl std::ops::Index<&TokenRef> for Semantics { + type Output = Token; + + #[inline] + fn index(&self, index: &TokenRef) -> &Self::Output { + &self.syntax_tree[index] + } +} + impl Semantics { pub fn new( mid: ModuleId, @@ -801,10 +837,10 @@ impl Semantics { self.syntax_tree .root() .map(|file| { - self.syntax_tree[file] + self[file] .children_of_kind(&self.syntax_tree, TreeKind::Import) .filter_map(|import| { - let tok = self.syntax_tree[import].nth_token(1)?; + let tok = &self[self[import].nth_token(1)?]; if tok.kind != TokenKind::String { None } else { @@ -862,17 +898,18 @@ impl Semantics { where T: ToString, { - let tree = &self.syntax_tree[tree]; + let tree = &self[tree]; self.report_error_span(tree.start_pos, tree.end_pos, error) } fn gather_errors(&mut self, tree: TreeRef) { let mut stack = vec![tree]; while let Some(tr) = stack.pop() { - let tree = &self.syntax_tree[tr]; + let tree = &self[tr]; for child in &tree.children { match child { Child::Token(t) => { + let t = &self[*t]; if t.kind == TokenKind::Error { self.report_error_span(t.start(), t.end(), t.as_str(&self.source)); } @@ -970,7 +1007,7 @@ impl Semantics { *state = Incremental::InProgress; } - let tree = &self.syntax_tree[t]; + let tree = &self[t]; // eprintln!(">>> environment_of => {tree:?}"); let parent = match self.logical_parents[t.index()] { @@ -1000,14 +1037,14 @@ impl Semantics { for child in tree.children.iter() { match child { Child::Tree(t) => { - let ct = &self.syntax_tree[*t]; + let ct = &self[*t]; if ct.kind == TreeKind::FunctionDecl { let Some(name) = ct.nth_token(1) else { continue; }; let existing = environment.declarations.insert( - name.as_str(&self.source).into(), + self[name].as_str(&self.source).into(), Declaration { location: Location::Function, index: self.function_index_of(*t), @@ -1021,7 +1058,7 @@ impl Semantics { ct, format!( "duplicate definition of function '{}'", - name.as_str(&self.source) + self[name].as_str(&self.source) ), ); } @@ -1053,12 +1090,13 @@ impl Semantics { // children of an export tree still go in the local // environment. loop { - let ct = &self.syntax_tree[t]; + let ct = &self[t]; match ct.kind { TreeKind::FunctionDecl => { let Some(name) = ct.nth_token(1) else { break None; }; + let name = &self[name]; if name.kind != TokenKind::Identifier { break None; } @@ -1076,6 +1114,7 @@ impl Semantics { let Some(name) = ct.nth_token(1) else { break None; }; + let name = &self[name]; if name.kind != TokenKind::Identifier { break None; } @@ -1093,6 +1132,7 @@ impl Semantics { let Some(name) = ct.nth_token(3) else { break None; }; + let name = &self[name]; if name.kind != TokenKind::Identifier { break None; } @@ -1117,6 +1157,7 @@ impl Semantics { TreeKind::ExportList => { for child in &ct.children { if let Child::Token(tok) = child { + let tok = &self[tok]; if tok.kind == TokenKind::Identifier { explicit_exports.push(tok); } @@ -1128,7 +1169,7 @@ impl Semantics { } }; - let ct = &self.syntax_tree[*t]; + let ct = &self[*t]; if let Some((what, name, declaration)) = binding { let existing = environment .declarations @@ -1180,7 +1221,7 @@ impl Semantics { }; let mut environment = Environment::new(self.mid, Some(parent), location); - environment.insert(name.as_str(&self.source), declaration); + environment.insert(self[name].as_str(&self.source), declaration); EnvironmentRef::new(environment) } @@ -1193,10 +1234,10 @@ impl Semantics { let Child::Tree(ct) = child else { continue; }; - let param = &self.syntax_tree[*ct]; + let param = &self[*ct]; match param.kind { TreeKind::SelfParameter => { - let param_name = param.nth_token(0).unwrap(); + let param_name = &self[param.nth_token(0).unwrap()]; if environment .insert(param_name.as_str(&self.source), *ct) .is_some() @@ -1216,6 +1257,7 @@ impl Semantics { let Some(param_name) = param.nth_token(0) else { continue; }; + let param_name = &self[param_name]; let param_str = param_name.as_str(&self.source); if environment.insert(param_str, *ct).is_some() { @@ -1237,13 +1279,13 @@ impl Semantics { return parent; }; - let iterator = &self.syntax_tree[it]; + let iterator = &self[it]; let Some(id) = iterator.nth_token(0) else { return parent; }; let mut environment = Environment::new(self.mid, Some(parent), Location::Local); - environment.insert(id.as_str(&self.source), it); + environment.insert(&self[id].as_str(&self.source), it); EnvironmentRef::new(environment) } @@ -1284,14 +1326,14 @@ impl Semantics { let Some(match_body) = tree.parent else { self.internal_compiler_error(Some(t), "no parent on match arm"); }; - let match_body = &self.syntax_tree[match_body]; + let match_body = &self[match_body]; if match_body.kind != TreeKind::MatchBody { self.internal_compiler_error(Some(t), "match arm parent not match body"); } let Some(match_expression) = match_body.parent else { self.internal_compiler_error(Some(t), "no parent on match body"); }; - let match_expression = &self.syntax_tree[match_expression]; + let match_expression = &self[match_expression]; if match_expression.kind != TreeKind::MatchExpression { self.internal_compiler_error(Some(t), "match body parent not match expression"); } @@ -1343,7 +1385,7 @@ impl Semantics { // TODO: This binding should be un-assignable! Don't assign to this! let mut env = Environment::new(self.mid, Some(parent), Location::Local); - env.insert(variable.as_str(&self.source), variable_decl); + env.insert(&self[variable].as_str(&self.source), variable_decl); EnvironmentRef::new(env) } @@ -1371,25 +1413,25 @@ impl Semantics { } // TODO: Right now there's only one way to make a class decl. :P - let tree = &self.syntax_tree[t]; + let tree = &self[t]; assert_eq!(tree.kind, TreeKind::ClassDecl); let name = tree .nth_token(1) - .map(|t| t.as_str(&self.source)) + .map(|t| self[t].as_str(&self.source)) .unwrap_or(""); // Fields let mut fields = Vec::new(); for field in tree.children_of_kind(&self.syntax_tree, TreeKind::FieldDecl) { - let f = &self.syntax_tree[field]; + let f = &self[field]; if let Some(field_name) = f.nth_token(0) { let field_type = f .nth_tree(2) .map(|t| self.type_of(t)) .unwrap_or_else(|| self.type_error_for(f)); fields.push(FieldDecl { - name: field_name.as_str(&self.source).into(), + name: self[field_name].as_str(&self.source).into(), declaration: field, field_type, }); @@ -1399,8 +1441,10 @@ impl Semantics { // Methods let mut methods = Vec::new(); for method in tree.children_of_kind(&self.syntax_tree, TreeKind::FunctionDecl) { - let m = &self.syntax_tree[method]; + let m = &self[method]; if let Some(method_name) = m.nth_token(1) { + let method_name = &self[method_name]; + // TODO: Check to see if it is actually a method, or if it is a static function. let decl_type = self.type_of(method); match decl_type { @@ -1582,7 +1626,7 @@ impl Semantics { *state = Incremental::InProgress; } - let tree = &self.syntax_tree[t]; + let tree = &self[t]; // eprintln!(">>> type_of => {tree:?}"); let result = match tree.kind { @@ -1641,7 +1685,7 @@ impl Semantics { fn type_of_unary(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::UnaryExpression); - let op = tree.nth_token(0)?; + let op = &self[tree.nth_token(0)?]; let expr = tree.nth_tree(1)?; let argument_type = self.type_of(expr); @@ -1682,7 +1726,7 @@ impl Semantics { assert_eq!(tree.kind, TreeKind::BinaryExpression); let left_tree = tree.nth_tree(0)?; let lhs = self.type_of(left_tree); - let op = tree.nth_token(1)?; + let op = &self[tree.nth_token(1)?]; let rhs = self.type_of(tree.nth_tree(2)?); match (op.kind, lhs, rhs) { @@ -1740,7 +1784,7 @@ impl Semantics { (_, _, Type::Error(e)) => Some(Type::Error(e)), // Assignments are fun. - (TokenKind::Equal, a, b) => self.type_of_assignment(left_tree, a, b, op), + (TokenKind::Equal, a, b) => self.type_of_assignment(left_tree, a, b, &op), // Missed the whole table, it must be an error. (_, left_type, right_type) => { @@ -1765,7 +1809,7 @@ impl Semantics { op: &Token, ) -> Option { // Ensure the left tree is an lvalue - let tree = &self.syntax_tree[left_tree]; + let tree = &self[left_tree]; #[allow(unused_assignments)] let mut environment = None; @@ -1773,7 +1817,7 @@ impl Semantics { let declaration = match tree.kind { // TODO: Assign to list access TreeKind::Identifier => { - let id = tree.nth_token(0)?.as_str(&self.source); + let id = self[tree.nth_token(0)?].as_str(&self.source); environment = Some(self.environment_of(left_tree)); match environment.as_ref().unwrap().bind(id) { Some(decl) => decl, @@ -1788,7 +1832,7 @@ impl Semantics { } } TreeKind::MemberAccess => { - let id = tree.nth_token(2)?.as_str(&self.source); + let id = self[tree.nth_token(2)?].as_str(&self.source); let typ = self.type_of(tree.nth_tree(0)?); environment = Some(self.member_environment(left_tree, &typ)); match environment.as_ref().unwrap().bind(id) { @@ -1873,7 +1917,7 @@ impl Semantics { assert_eq!(tree.kind, TreeKind::TypeIdentifier); // TODO: This will *clearly* need to get better. - let token = tree.nth_token(0)?.as_str(&self.source); + let token = self[tree.nth_token(0)?].as_str(&self.source); match token { "f64" => Some(Type::F64), "string" => Some(Type::String), @@ -1989,7 +2033,7 @@ impl Semantics { fn type_of_literal(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::LiteralExpression); - let tok = tree.nth_token(0)?; + let tok = &self[tree.nth_token(0)?]; let pig = match tok.kind { TokenKind::Number => Type::F64, TokenKind::String => Type::String, @@ -2015,7 +2059,7 @@ impl Semantics { let has_else = tree .nth_token(3) - .map(|t| t.kind == TokenKind::Else) + .map(|t| self[t].kind == TokenKind::Else) .unwrap_or(false); let else_type = if has_else { Some(self.type_of(tree.nth_tree(4)?)) @@ -2052,7 +2096,7 @@ impl Semantics { let f_ref = tree.nth_tree(0)?; let f = self.type_of(f_ref); - let arg_list = &self.syntax_tree[tree.nth_tree(1)?]; + let arg_list = &self[tree.nth_tree(1)?]; let arg_types: Vec<_> = arg_list .children .iter() @@ -2161,7 +2205,7 @@ impl Semantics { let lhs = tree.nth_tree(0)?; let typ = self.type_of(lhs); let env = self.member_environment(lhs, &typ); - let id = tree.nth_token(2)?; + let id = &self[tree.nth_token(2)?]; if id.kind != TokenKind::Identifier { return Some(self.type_error_for(tree)); } @@ -2202,7 +2246,7 @@ impl Semantics { let Some(root) = other.syntax_tree.root() else { self.internal_compiler_error(Some(t), "Other syntax tree has no root"); }; - let rt = &other.syntax_tree[root]; + let rt = &other[root]; assert_eq!(rt.kind, TreeKind::File); let mut result = Environment::new(self.mid, None, Location::Module); @@ -2227,7 +2271,7 @@ impl Semantics { assert_eq!(tree.kind, TreeKind::ExpressionStatement); let last_is_semicolon = tree .nth_token(tree.children.len() - 1) - .map(|t| t.kind == TokenKind::Semicolon) + .map(|t| self[t].kind == TokenKind::Semicolon) .unwrap_or(false); let expression_type = tree.nth_tree(0).map(|t| self.type_of(t)); @@ -2249,7 +2293,7 @@ impl Semantics { fn type_of_identifier(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Identifier); - let id = tree.nth_token(0)?.as_str(&self.source); + let id = self[tree.nth_token(0)?].as_str(&self.source); let environment = self.environment_of(t); if let Some(declaration) = environment.bind(id) { let typ = self.type_of_declaration(declaration); @@ -2303,13 +2347,13 @@ impl Semantics { fn type_of_self_parameter(&self, tree: &Tree) -> Option { let pl = tree.parent?; - let param_list = &self.syntax_tree[pl]; + let param_list = &self[pl]; let fd = param_list.parent?; - let function_decl = &self.syntax_tree[fd]; + let function_decl = &self[fd]; let cd = function_decl.parent?; - let class_decl = &self.syntax_tree[cd]; + let class_decl = &self[cd]; if class_decl.kind != TreeKind::ClassDecl { let error = self.report_error_tree(tree, "self parameter only allowed in methods"); @@ -2322,7 +2366,7 @@ impl Semantics { fn type_of_self_reference(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::SelfReference); - let id = tree.nth_token(0)?.as_str(&self.source); + let id = self[tree.nth_token(0)?].as_str(&self.source); let environment = self.environment_of(t); if let Some(declaration) = environment.bind(id) { return Some(self.type_of_declaration(declaration)); @@ -2349,7 +2393,7 @@ impl Semantics { let mut parameter_types = Vec::new(); for p in param_list.child_trees() { let p_type = Box::new(self.type_of(p)); - if self.syntax_tree[p].kind == TreeKind::SelfParameter { + if self[p].kind == TreeKind::SelfParameter { self_type = Some(p_type); } else { parameter_types.push(p_type); @@ -2382,7 +2426,7 @@ impl Semantics { fn type_of_iterator_variable(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::IteratorVariable); - let parent = &self.syntax_tree[tree.parent?]; + let parent = &self[tree.parent?]; assert_eq!(parent.kind, TreeKind::ForStatement); let enumerable = parent.nth_tree(3)?; @@ -2443,7 +2487,7 @@ impl Semantics { // The details of a class are computed lazily, but this is enough of // a belly-button. - let name = tree.nth_token(1)?; + let name = &self[tree.nth_token(1)?]; // NOTE: There's a kind of a weird design decision here, which is to // return an instance type instead of a class type. This is @@ -2478,7 +2522,7 @@ impl Semantics { assert_eq!(tree.kind, TreeKind::FieldValue); if let Some(colon) = tree.nth_token(1) { - if colon.kind == TokenKind::Colon { + if self[colon].kind == TokenKind::Colon { // Form 1: { x: e, ... } return Some(self.type_of(tree.nth_tree(2)?)); } @@ -2486,7 +2530,7 @@ impl Semantics { // Form 2: { x, ... } let environment = self.environment_of(t); - let id = tree.nth_token(0)?.as_str(&self.source); + let id = self[tree.nth_token(0)?].as_str(&self.source); let declaration = match environment.bind(id) { Some(d) => d, None => { @@ -2593,7 +2637,7 @@ impl Semantics { } fn type_of_import(&self, tree: &Tree) -> Option { - let tok = tree.nth_token(1)?; + let tok = &self[tree.nth_token(1)?]; if tok.kind != TokenKind::String { return Some(self.type_error_for(tree)); } @@ -2618,10 +2662,10 @@ impl Semantics { fn constant_eval(&self, t: TreeRef) -> Option { // TODO: Make this cached, incremental, so the compiler can use it for optimizations. - let tree = &self.syntax_tree[t]; + let tree = &self[t]; match tree.kind { TreeKind::LiteralExpression => { - let tok = tree.nth_token(0)?; + let tok = &self[tree.nth_token(0)?]; match self.type_of(t) { Type::F64 => Some(StackValue::Float(tok.as_str(&self.source).parse().unwrap())), Type::Bool => Some(StackValue::Bool(tok.kind == TokenKind::True)), @@ -2635,7 +2679,7 @@ impl Semantics { TreeKind::IsExpression => { let pt = tree.nth_tree(2)?; - let pattern = &self.syntax_tree[pt]; + let pattern = &self[pt]; if pattern .child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern) .is_some() @@ -2651,7 +2695,7 @@ impl Semantics { TreeKind::GroupingExpression => self.constant_eval(tree.nth_tree(1)?), TreeKind::UnaryExpression => { - let op = tree.nth_token(0)?.kind; + let op = self[tree.nth_token(0)?].kind; let val = self.constant_eval(tree.nth_tree(1)?)?; match (op, val) { @@ -2665,7 +2709,7 @@ impl Semantics { TreeKind::BinaryExpression => { let left = self.constant_eval(tree.nth_tree(0)?)?; let right = self.constant_eval(tree.nth_tree(2)?)?; - let op = tree.nth_token(1)?.kind; + let op = self[tree.nth_token(1)?].kind; match (op, left, right) { (TokenKind::Plus, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Float(a + b)) @@ -2787,11 +2831,11 @@ impl Semantics { } if let Some(tr) = tr { - eprintln!("This is about the tree: {:?}", &self.syntax_tree[tr]); + eprintln!("This is about the tree: {:?}", &self[tr]); eprintln!("The logical parent chain of the tree was:\n"); let mut current = Some(tr); while let Some(c) = current { - let t = &self.syntax_tree[c]; + let t = &self[c]; eprintln!(" {:?} [{}-{})", t.kind, t.start_pos, t.end_pos); current = self.logical_parents[c.index()]; } @@ -2824,7 +2868,7 @@ impl Semantics { pub fn check(s: &Semantics) { for t in s.syntax_tree.trees() { - let tree = &s.syntax_tree[t]; + let tree = &s[t]; match tree.kind { TreeKind::Error => {} // already reported TreeKind::File => {} @@ -2938,7 +2982,7 @@ fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) { // Just work very hard to get an appropriate error span. let (start, end) = return_type_tree .map(|t| { - let rtt = &s.syntax_tree[t]; + let rtt = &s[t]; (rtt.start_pos, rtt.end_pos) }) .unwrap_or_else(|| { @@ -2946,7 +2990,7 @@ fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) { let end_tok = tree .nth_token(1) .unwrap_or_else(|| tree.nth_token(0).unwrap()); - (start, end_tok.end()) + (start, s[end_tok].end()) }); s.report_error_span(start, end, format!("the body of this function yields a value of type '{body_type}', but callers expect this function to produce a '{return_type}'")); @@ -2958,6 +3002,7 @@ fn check_let(s: &Semantics, tree: &Tree) { let Some(name) = tree.nth_token(1) else { return; }; + let name = &s[name]; let Some(expr) = tree.nth_tree(3) else { return }; if let Type::Method(..) = s.type_of(expr) { @@ -2974,7 +3019,7 @@ fn check_return_statement(s: &Semantics, tree: &Tree) { let mut enclosing_function = tree.parent; while let Some(fp) = enclosing_function { - let fpt = &s.syntax_tree[fp]; + let fpt = &s[fp]; if fpt.kind == TreeKind::FunctionDecl { break; } @@ -3033,10 +3078,10 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) { let mut any_errors = false; let mut field_bindings = HashMap::new(); for field in field_list.children_of_kind(&s.syntax_tree, TreeKind::FieldValue) { - let f = &s.syntax_tree[field]; + let f = &s[field]; if let Some(name) = f.nth_token(0) { let field_type = s.type_of(field); - field_bindings.insert(name.as_str(&s.source), (field, field_type)); + field_bindings.insert(s[name].as_str(&s.source), (field, field_type)); } else { any_errors = true; } @@ -3085,11 +3130,11 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) { fn check_class_declaration(s: &Semantics, tree: &Tree) { let mut fields = HashMap::new(); for field in tree.children_of_kind(&s.syntax_tree, TreeKind::FieldDecl) { - let f = &s.syntax_tree[field]; + let f = &s[field]; let Some(name) = f.nth_token(0) else { continue; }; - let name = name.as_str(&s.source); + let name = s[name].as_str(&s.source); match fields.insert(name, field) { Some(_) => { s.report_error_tree(f, format!("duplicate definition of field '{name}'")); @@ -3102,7 +3147,7 @@ fn check_class_declaration(s: &Semantics, tree: &Tree) { fn check_pattern(s: &Semantics, tree: &Tree) { // If there's an AND then it must produce a boolean. let and_index = tree.children.iter().position(|c| match c { - Child::Token(t) => t.kind == TokenKind::And, + Child::Token(t) => s[t].kind == TokenKind::And, _ => false, }); if let Some(and_index) = and_index {