use crate::{ parser::{Child, SyntaxTree, TokenRef, Tree, TreeKind, TreeRef}, tokens::{Lines, Token, TokenKind}, vm::StackValue, }; use std::{ cell::{OnceCell, RefCell}, collections::HashMap, fmt, rc::{Rc, Weak}, }; // TODO: Unused variables? // TODO: Underscore for discard? // TODO: An error should have: // // - a start // - an end // - a focus // - descriptive messages // // that will have to wait for now #[derive(Clone, PartialEq, Eq)] pub struct Error { pub file: Rc, pub start: (usize, usize), pub end: (usize, usize), pub span: (usize, usize), pub message: String, } impl Error { pub fn new( file: Rc, start: (usize, usize), end: (usize, usize), span: (usize, usize), message: T, ) -> Self where T: ToString, { Error { file, start, end, span, message: message.to_string(), } } } impl fmt::Debug for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self}") } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{}:{}:{}: {}", self.file, self.start.0, self.start.1, self.message ) } } pub struct FieldDecl { pub name: Rc, pub field_type: Type, pub declaration: TreeRef, } pub struct MethodDecl { pub name: Rc, pub decl_type: Type, pub declaration: TreeRef, pub is_static: bool, } pub struct ClassDecl { pub name: Rc, pub fields: Vec, pub methods: Vec, pub decl_tree: TreeRef, pub env: EnvironmentRef, pub static_env: EnvironmentRef, } #[derive(Clone)] pub struct ClassRef(Rc); impl ClassRef { pub fn new(class: ClassDecl) -> Self { ClassRef(Rc::new(class)) } } impl std::ops::Deref for ClassRef { type Target = ClassDecl; fn deref(&self) -> &Self::Target { &self.0 } } #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct ModuleId(u64); impl From for ModuleId { fn from(value: u64) -> Self { ModuleId(value) } } impl fmt::Display for ModuleId { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "#{}", self.0) } } pub struct ModuleTable { modules: HashMap>, } impl ModuleTable { pub fn new() -> ModuleTable { ModuleTable { modules: HashMap::new(), } } pub fn set_module(&mut self, id: ModuleId, semantics: Weak) { self.modules.insert(id, semantics); } pub fn get_module(&self, id: &ModuleId) -> Option> { self.modules.get(id).map(|s| s.upgrade()).flatten() } pub fn iter(&self) -> std::collections::hash_map::Iter<'_, ModuleId, Weak> { self.modules.iter() } } #[derive(Clone)] pub enum Type { // Signals a type error. If you receive this then you know that an error // has already been reported; if you produce this be sure to also note // the error in the errors collection. Error(Rc), // Signals that the expression has a control-flow side-effect and that no // value will ever result from this expression. Usually this means // everything's fine. Unreachable, // The type of an assignment expression. Assignments look like // expressions but cannot be used in places that expect them (e.g., in // `if` conditions), and so this is how we signal that. (We can, however, // chain assignments, and so we flow the type of the assignment through.) Assignment(Box), // An potentially-bound type variable. // We need to ... like ... unify these things if possible. TypeVariable(TreeRef), Nothing, // TODO: Numeric literals should be implicitly convertable, unlike other // types. Maybe just "numeric literal" type? F64, I64, String, Bool, Function(Vec>, Box), // A method is like a function except that it takes a self parameter. // This is how we signal the difference. We do *not* count them as the // same. Method(Box, Vec>, Box), List(Box), // A class is the static type of a class; when the class is referred to // by name it has this type. (Distinct from an instance!) Class(ModuleId, TreeRef, Rc), // An object is the type of an allocated object instance. Details of its // class need to be fetched explicitly from the semantics via the // TreeRef and `Semantics::class_of`; they are computed lazily. Object(ModuleId, TreeRef, Rc), // An alternate is one or another type. Alternate(Box<[Type]>), // A module of some kind. What module? Module(Rc, ModuleId), } impl Type { pub fn is_error(&self) -> bool { match self { Type::Error(..) => true, _ => false, } } fn discriminant_number(&self) -> i8 { match self { Type::Error(..) => 0, Type::Unreachable => 1, Type::Assignment(..) => 2, Type::TypeVariable(..) => 3, Type::Nothing => 4, Type::F64 => 5, Type::I64 => 6, Type::String => 7, Type::Bool => 8, Type::Function(..) => 9, Type::Method(..) => 10, Type::List(..) => 11, Type::Class(..) => 12, Type::Object(..) => 13, Type::Alternate(..) => 14, Type::Module(..) => 15, } } } impl fmt::Debug for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{self}") } } impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { use Type::*; match self { Error(e) => write!(f, "<< INTERNAL ERROR ({e}) >>"), Unreachable => write!(f, "<< UNREACHABLE >>"), Assignment(_) => write!(f, "assignment"), Nothing => write!(f, "nothing"), F64 => write!(f, "f64"), I64 => write!(f, "i64"), String => write!(f, "string"), Bool => write!(f, "bool"), Function(args, ret) => { write!(f, "fun (")?; let mut first = true; for arg in args.iter() { if !first { write!(f, ", ")?; } write!(f, "{arg}")?; first = false; } write!(f, ") -> {ret}") } Method(self_type, args, ret) => { write!(f, "method of {self_type} (")?; let mut first = true; for arg in args.iter() { if !first { write!(f, ", ")?; } write!(f, "{arg}")?; first = false; } write!(f, ") -> {ret}") } TypeVariable(_) => { // TODO: Better names for type variable write!(f, "$_") } List(t) => write!(f, "list<{t}>"), Object(_, _, name) => write!(f, "{} instance", name), Class(_, _, name) => write!(f, "class {}", name), Alternate(ts) => { let mut first = true; for t in ts.iter() { if !first { write!(f, " or ")?; } write!(f, "{t}")?; first = false; } Ok(()) } Module(name, _) => write!(f, "module {}", name), } } } impl std::cmp::PartialEq for Type { fn eq(&self, other: &Self) -> bool { self.cmp(other).is_eq() } } impl std::cmp::Eq for Type {} impl std::cmp::PartialOrd for Type { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl std::cmp::Ord for Type { fn cmp(&self, other: &Self) -> std::cmp::Ordering { use std::cmp::Ordering; let li = self.discriminant_number(); let ri = other.discriminant_number(); if li < ri { Ordering::Less } else if li > ri { Ordering::Greater } else { match (self, other) { (Type::Assignment(x), Type::Assignment(y)) => x.cmp(y), // NOTE: This is wrong! Type variables *cannot* be compared // like this without binding information. (Type::TypeVariable(x), Type::TypeVariable(y)) => x.index().cmp(&y.index()), (Type::Function(la, lr), Type::Function(ra, rr)) => { let lv = (la, lr); let rv = (ra, rr); lv.cmp(&rv) } (Type::Method(lo, la, lr), Type::Method(ro, ra, rr)) => { let lv = (lo, la, lr); let rv = (ro, ra, rr); lv.cmp(&rv) } (Type::List(x), Type::List(y)) => x.cmp(y), (Type::Class(lm, lt, _), Type::Class(rm, rt, _)) => { (lm.0, lt.index()).cmp(&(rm.0, rt.index())) } (Type::Object(lm, lt, _), Type::Object(rm, rt, _)) => { (lm.0, lt.index()).cmp(&(rm.0, rt.index())) } (Type::Alternate(ll), Type::Alternate(rr)) => ll.cmp(rr), _ => Ordering::Equal, } } } } // NOTE: I tried to actually embed the coordinate inside the location but // that doesn't work well for other things we want to express, so we // leave it alone. A data modeling maximalist might make *two* enums // (with and without a coordinate) but... that's a lot of complexity // for very little gain. Maybe we can come back to it when things this // design is a little more stable. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Location { Argument, // An argument to a function Local, // A local in an frame Slot, // A slot in an object Module, // A global in a module Function, // A function in a module ExternalFunction, // An external function (module unrelated) Class, // A class in a module Import, // An import in a module (index unrelated) } // TODO: Is `usize` what we want? Do we want e.g. dyn trait for invoke? #[derive(Clone, Debug)] pub struct ExternalFunctionId(usize); impl ExternalFunctionId { pub fn id(&self) -> usize { self.0 } } #[derive(Clone, Debug)] pub enum Origin { Source(TreeRef), External(Type), } #[derive(Clone, Debug)] pub struct Declaration { pub location: Location, pub index: usize, pub module: ModuleId, pub origin: Origin, pub exported: bool, } impl Declaration { pub fn is_exported(&self) -> bool { self.exported } pub fn tree(&self) -> Option { match self.origin { Origin::Source(t) => Some(t), _ => None, } } pub fn set_exported(&mut self) { self.exported = true } } pub struct Environment { pub parent: Option, pub module: ModuleId, pub location: Location, pub next_index: usize, pub declarations: HashMap, Declaration>, pub error: Option>, } impl Environment { pub fn new(module: ModuleId, parent: Option, location: Location) -> Self { let parent_location = parent .as_ref() .map(|p| p.location) .unwrap_or(Location::Module); let base = parent.as_ref().map(|p| p.next_index).unwrap_or(0); let next_index = match (parent_location, location) { (_, Location::Argument) => 0, (_, Location::Slot) => 0, (Location::Local, Location::Local) => base, (_, Location::Local) => 0, (Location::Module, Location::Module) => base, _ => panic!("{location:?} is not suitable as a default location"), }; Environment { parent, module, location, next_index, declarations: HashMap::new(), error: None, } } pub fn is_error(&self) -> bool { self.error.is_some() } pub fn error(why: Rc) -> EnvironmentRef { EnvironmentRef::new(Environment { parent: None, module: ModuleId(0), location: Location::Local, next_index: 0, declarations: HashMap::new(), error: Some(why), }) } pub fn insert(&mut self, token: &str, t: TreeRef) -> Option { self.insert_name(token.into(), t) } pub fn insert_name(&mut self, name: Box, t: TreeRef) -> Option { let result = self.declarations.insert( name, Declaration { location: self.location, index: self.next_index, module: self.module, origin: Origin::Source(t), exported: false, }, ); self.next_index += 1; result } pub fn bind(&self, token: &str) -> Option<&Declaration> { if let Some(decl) = self.declarations.get(token) { return Some(decl); } let mut current = &self.parent; while let Some(env) = current { if let Some(decl) = env.declarations.get(token) { return Some(decl); } current = &env.parent; } None } } #[derive(Clone)] pub struct EnvironmentRef(Rc); impl EnvironmentRef { pub fn new(environment: Environment) -> Self { EnvironmentRef(Rc::new(environment)) } } impl std::ops::Deref for EnvironmentRef { type Target = Environment; fn deref(&self) -> &Self::Target { &self.0 } } fn set_logical_parents( parents: &mut Vec>, syntax_tree: &SyntaxTree, t: TreeRef, parent: Option, ) { parents[t.index()] = parent.clone(); let tree = &syntax_tree[t]; // eprintln!("SET PARENT {parent:?} => CHILD {tree:?} ({t:?})"); match tree.kind { TreeKind::Block | TreeKind::File => { // In a block (or at the top level), each child actually points // to the previous child as the logical parent, so that variable // declarations that occur as part of statements in the block are // available to statements later in the block. let mut parent = Some(t); for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => { set_logical_parents(parents, syntax_tree, *ct, parent); parent = Some(*ct); } } } } TreeKind::LetStatement => { // In a let statement, the logical parent of the children is // actually the logical parent of the let statement, so that the // variable doesn't have itself in scope. :P for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => set_logical_parents(parents, syntax_tree, *ct, parent), } } } TreeKind::FunctionDecl => { // In a function declaration, the logical parent of the body is // the parameter list. let param_list = tree.child_of_kind(syntax_tree, TreeKind::ParamList); let body = tree.child_of_kind(syntax_tree, TreeKind::Block); for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => { if Some(*ct) == body { set_logical_parents(parents, syntax_tree, *ct, param_list); } else { set_logical_parents(parents, syntax_tree, *ct, Some(t)); } } } } } TreeKind::ForStatement => { let body = tree.child_of_kind(syntax_tree, TreeKind::Block); for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => { if Some(*ct) == body { set_logical_parents(parents, syntax_tree, *ct, Some(t)); } else { // If it's not the body then it must be the // iterable and the iterable doesn't have the // loop variable in scope. set_logical_parents(parents, syntax_tree, *ct, parent); } } } } } TreeKind::ConditionalExpression => { // Special case! The parent of the `then` clause is the // condition, so any variable bound by the condition is valid in // the `then` clause. The `else` clause and the condition itself // do not have the bindings in scope, obviously. let body_parent = if let Some(is_condition) = tree.nth_tree(1) { Some(is_condition) } else { Some(t) }; let then_body = tree.nth_tree(2); for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => { if Some(*ct) == then_body { set_logical_parents(parents, syntax_tree, *ct, body_parent); } else { set_logical_parents(parents, syntax_tree, *ct, Some(t)); } } } } } TreeKind::WhileStatement => { // Just like `if`, bindings in the condition are valid in the body. let body_parent = if let Some(is_condition) = tree.nth_tree(1) { Some(is_condition) } else { Some(t) }; let then_body = tree.nth_tree(2); for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => { if Some(*ct) == then_body { set_logical_parents(parents, syntax_tree, *ct, body_parent); } else { set_logical_parents(parents, syntax_tree, *ct, Some(t)); } } } } } _ => { // By default, the parent for each child is current tree. for child in &tree.children { match child { Child::Token(_) => (), Child::Tree(ct) => set_logical_parents(parents, syntax_tree, *ct, Some(t)), } } } } } // Process escapes and convert a string constant in source to a runtime String value. pub fn string_constant_to_string(s: &str) -> String { let mut result = String::new(); if s.len() <= 2 { return result; } let mut input = s[1..s.len() - 1].chars(); while let Some(ch) = input.next() { if ch == '\\' { if let Some(ch) = input.next() { match ch { 'n' => result.push('\n'), 'r' => result.push('\r'), 't' => result.push('\t'), _ => result.push(ch), } } else { result.push(ch) } } else { result.push(ch) } } result } #[derive(Clone, Copy, Debug)] enum Incremental { None, InProgress, Complete(T), } pub struct Semantics { mid: ModuleId, file: Rc, source: Rc, syntax_tree: Rc, lines: Rc, module_table: RefCell>, import_map: OnceCell>, // Instead of physical parents, this is the set of *logical* parents. // This is what is used for binding. logical_parents: Vec>, function_count: usize, function_indices: Vec>, // TODO: State should be externalized instead of this refcell nonsense. errors: RefCell>>, types: RefCell>>, environments: RefCell>>, root_environment: EnvironmentRef, classes: RefCell>>, } impl std::ops::Index for Semantics { type Output = Tree; #[inline] fn index(&self, index: TreeRef) -> &Self::Output { &self.syntax_tree[index] } } impl std::ops::Index<&TreeRef> for Semantics { type Output = Tree; #[inline] fn index(&self, index: &TreeRef) -> &Self::Output { &self.syntax_tree[index] } } impl std::ops::Index for Semantics { type Output = Token; #[inline] fn index(&self, index: TokenRef) -> &Self::Output { &self.syntax_tree[index] } } impl std::ops::Index<&TokenRef> for Semantics { type Output = Token; #[inline] fn index(&self, index: &TokenRef) -> &Self::Output { &self.syntax_tree[index] } } impl Semantics { pub fn new( mid: ModuleId, file: Rc, source: Rc, tree: Rc, lines: Rc, ) -> Self { let mut logical_parents = vec![None; tree.len()]; if let Some(root) = tree.root() { set_logical_parents(&mut logical_parents, &tree, root, None); } let root_environment = Environment::new(mid, None, Location::Module); let mut function_count = 0; let mut function_indices = vec![None; tree.len()]; for t in tree.trees() { let tree = &tree[t]; match tree.kind { TreeKind::FunctionDecl | TreeKind::ClassDecl => { function_indices[t.index()] = Some(function_count); function_count += 1; } _ => {} } } let mut semantics = Semantics { mid, file, source, syntax_tree: tree.clone(), lines, module_table: RefCell::new(Rc::new(ModuleTable::new())), import_map: OnceCell::new(), logical_parents, function_count, function_indices, errors: RefCell::new(vec![]), types: RefCell::new(vec![Incremental::None; tree.len()]), environments: RefCell::new(vec![Incremental::None; tree.len()]), root_environment: EnvironmentRef::new(root_environment), classes: RefCell::new(vec![Incremental::None; tree.len()]), }; // NOTE: We ensure all the known errors are reported before we move // on to answering any other questions. We're going to work as // hard as we can from a partial tree. if let Some(tr) = semantics.syntax_tree.root() { semantics.gather_errors(tr); } semantics } pub fn set_imports(&self, imports: HashMap) { self.import_map.set(imports).expect("imports already set"); } pub fn set_module_table(&self, table: Rc) { self.module_table.replace(table); } pub fn import_ids(&self) -> Vec { let import_map = self.import_map.get().unwrap(); import_map.values().map(|id| *id).collect() } pub fn import_by_id(&self, mid: ModuleId) -> Option> { self.module_table.borrow().get_module(&mid) } pub fn source(&self) -> Rc { self.source.clone() } pub fn tree(&self) -> Rc { self.syntax_tree.clone() } pub fn lines(&self) -> Rc { self.lines.clone() } pub fn imports(&self) -> Vec { self.syntax_tree .root() .map(|file| { self[file] .children_of_kind(&self.syntax_tree, TreeKind::Import) .filter_map(|import| { let tok = &self[self[import].nth_token(1)?]; if tok.kind != TokenKind::String { None } else { Some(string_constant_to_string(tok.as_str(&self.source))) } }) .collect() }) .unwrap_or(Vec::new()) } pub fn snapshot_errors(&self) -> Vec> { let mut result = (*self.errors.borrow()).clone(); result.sort_by_key(|a| a.span.0); result } pub fn logical_parent(&self, tr: TreeRef) -> Option { if tr.index() < self.logical_parents.len() { self.logical_parents[tr.index()] } else { None } } pub fn mid(&self) -> ModuleId { self.mid } fn report_error_span(&self, start_pos: usize, end_pos: usize, error: T) -> Rc where T: ToString, { let start = self.lines.position(start_pos); let end = self.lines.position(end_pos); let error = Rc::new(Error::new( self.file.clone(), start, end, (start_pos, end_pos), error.to_string(), )); self.errors.borrow_mut().push(error.clone()); error } fn report_error_tree(&self, tree: &Tree, error: T) -> Rc where T: ToString, { self.report_error_span(tree.start_pos, tree.end_pos, error) } fn report_error_tree_ref(&self, tree: TreeRef, error: T) -> Rc where T: ToString, { let tree = &self[tree]; self.report_error_span(tree.start_pos, tree.end_pos, error) } fn gather_errors(&mut self, tree: TreeRef) { let mut stack = vec![tree]; while let Some(tr) = stack.pop() { let tree = &self[tr]; for child in &tree.children { match child { Child::Token(t) => { let t = &self[*t]; if t.kind == TokenKind::Error { self.report_error_span(t.start(), t.end(), t.as_str(&self.source)); } } Child::Tree(t) => stack.push(*t), } } } } // TODO: Here we're just looking for *an* error, not the most specific // error. fn find_error(&self, tree: &Tree) -> Option> { let mut result = (*self.errors.borrow()).clone(); result.sort_by_key(|a| a.span.0); let mut error = None; for candidate in result.into_iter() { let (candiate_start, candidate_end) = candidate.span; if candidate_end < tree.start_pos { continue; } if candiate_start > tree.end_pos { break; } // End is after our point, Start is before our point, we are // inside. This error at least affects us somehow. error = Some(candidate); } error } fn type_error_for(&self, tree: &Tree) -> Type { let Some(error) = self.find_error(&tree) else { self.internal_compiler_error( Some(tree.self_ref), "Unable to find a diagnostic that encompasses the tree generating an error type", ); }; Type::Error(error) } fn environment_error_for(&self, tree: &Tree) -> EnvironmentRef { let Some(error) = self.find_error(&tree) else { self.internal_compiler_error( Some(tree.self_ref), "Unable to find a diagnostic that encompasses the tree generating an error environment", ); }; Environment::error(error) } pub fn function_count(&self) -> usize { self.function_count } pub fn get_function_index(&self, t: TreeRef) -> Option { let index = t.index(); if index >= self.function_indices.len() { None } else { self.function_indices[t.index()] } } pub fn function_index_of(&self, t: TreeRef) -> usize { let Some(index) = self.function_indices[t.index()] else { self.internal_compiler_error(Some(t), "Why didn't I get a function index for this?"); }; index } pub fn environment_of(&self, t: TreeRef) -> EnvironmentRef { { // I want to make sure that this borrow is dropped after this block. let mut borrow = self.environments.borrow_mut(); let state = &mut borrow[t.index()]; match state { Incremental::None => (), Incremental::Complete(e) => return e.clone(), Incremental::InProgress => { // NOTE: Set the state so the ICE doesn't loop on itself. *state = Incremental::Complete(self.root_environment.clone()); drop(borrow); //eprintln!("environment_of circular => {t:?}"); self.internal_compiler_error(Some(t), "circular environment dependency"); } } *state = Incremental::InProgress; } let tree = &self[t]; // eprintln!(">>> environment_of => {tree:?}"); let parent = match self.logical_parents[t.index()] { Some(t) => self.environment_of(t), None => self.root_environment.clone(), }; let result = match tree.kind { TreeKind::Block => self.environment_of_block(parent, tree), TreeKind::File => self.environment_of_file(parent, tree), TreeKind::ForStatement => self.environment_of_for(parent, tree), TreeKind::IsExpression => self.environment_of_is_expression(parent, tree), TreeKind::LetStatement => self.environment_of_let(parent, tree), TreeKind::MatchArm => self.environment_of_match_arm(parent, t, tree), TreeKind::ParamList => self.environment_of_paramlist(parent, tree), _ => parent, }; self.environments.borrow_mut()[t.index()] = Incremental::Complete(result.clone()); // eprintln!("<<< environment_of => {tree:?}"); result } fn environment_of_block(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { let mut environment = Environment::new(self.mid, Some(parent), Location::Local); for child in tree.children.iter() { match child { Child::Tree(t) => { let ct = &self[*t]; if ct.kind == TreeKind::FunctionDecl { let Some(name) = ct.nth_token(1) else { continue; }; let existing = environment.declarations.insert( self[name].as_str(&self.source).into(), Declaration { location: Location::Function, index: self.function_index_of(*t), module: self.mid, origin: Origin::Source(*t), exported: false, }, ); if existing.is_some() { self.report_error_tree( ct, format!( "duplicate definition of function '{}'", self[name].as_str(&self.source) ), ); } } } _ => {} } } EnvironmentRef::new(environment) } fn environment_of_file(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { let mut environment = Environment::new(self.mid, Some(parent), Location::Module); let mut explicit_exports = Vec::new(); for child in tree.children.iter() { let Child::Tree(t) = child else { continue; }; let binding = { // Redeclare t to be mutable (and a copy) let mut t = *t; let mut exported = false; // Loop here in order to dereference TreeKind::Export; // children of an export tree still go in the local // environment. loop { let ct = &self[t]; match ct.kind { TreeKind::FunctionDecl => { let Some(name) = ct.nth_token(1) else { break None; }; let name = &self[name]; if name.kind != TokenKind::Identifier { break None; } let declaration = Declaration { location: Location::Function, index: self.function_index_of(t), module: self.mid, origin: Origin::Source(t), exported, }; break Some(("function", name, declaration)); } TreeKind::ClassDecl => { let Some(name) = ct.nth_token(1) else { break None; }; let name = &self[name]; if name.kind != TokenKind::Identifier { break None; } let declaration = Declaration { location: Location::Class, index: self.function_index_of(t), module: self.mid, origin: Origin::Source(t), exported, }; break Some(("class", name, declaration)); } TreeKind::Import => { let Some(name) = ct.nth_token(3) else { break None; }; let name = &self[name]; if name.kind != TokenKind::Identifier { break None; } let declaration = Declaration { location: Location::Import, index: 0, module: self.mid, origin: Origin::Source(t), exported, }; break Some(("import", name, declaration)); } TreeKind::Export => { let Some(inner) = ct.nth_tree(1) else { break None; }; t = inner; exported = true; continue; } TreeKind::ExportList => { for child in &ct.children { if let Child::Token(tok) = child { let tok = &self[tok]; if tok.kind == TokenKind::Identifier { explicit_exports.push(tok); } } } } _ => break None, } } }; let ct = &self[*t]; if let Some((what, name, declaration)) = binding { let existing = environment .declarations .insert(name.as_str(&self.source).into(), declaration); if existing.is_some() { self.report_error_tree( ct, format!( "duplicate definition of {what} '{}'", name.as_str(&self.source) ), ); } } } for tok in explicit_exports { environment .declarations .get_mut(tok.as_str(&self.source)) // NOTE: If not present, we report the error elsewhere. .map(|decl| decl.set_exported()); } EnvironmentRef::new(environment) } fn environment_of_let(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { let Some(name) = tree.nth_token(1) else { return parent; // Error is already reported, don't clobber parent bindings. }; let Some(declaration) = tree.nth_tree(3) else { return parent; // Error is already reported, don't clobber parent bindings. }; let location = match parent.location { Location::Local => Location::Local, Location::Module => Location::Module, Location::Argument => Location::Local, Location::Slot => Location::Local, _ => { let message = format!( "Unsuitable environment location for a let: {:?}", parent.location ); self.internal_compiler_error(Some(tree.self_ref), &message); } }; let mut environment = Environment::new(self.mid, Some(parent), location); environment.insert(self[name].as_str(&self.source), declaration); EnvironmentRef::new(environment) } fn environment_of_paramlist(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { assert!(tree.kind == TreeKind::ParamList); let mut environment = Environment::new(self.mid, Some(parent), Location::Argument); for (i, child) in tree.children.iter().enumerate() { let Child::Tree(ct) = child else { continue; }; let param = &self[*ct]; match param.kind { TreeKind::SelfParameter => { let param_name = &self[param.nth_token(0).unwrap()]; if environment .insert(param_name.as_str(&self.source), *ct) .is_some() { self.report_error_tree( param, format!("duplicate definition of self parameter"), ); } else if i != 1 { self.report_error_tree( param, "self parameter must be the first parameter} in the list", ); } } TreeKind::Parameter => { let Some(param_name) = param.nth_token(0) else { continue; }; let param_name = &self[param_name]; let param_str = param_name.as_str(&self.source); if environment.insert(param_str, *ct).is_some() { self.report_error_tree( param, format!("duplicate definition of parameter '{param_str}'"), ); } } _ => (), } } EnvironmentRef::new(environment) } fn environment_of_for(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { let Some(it) = tree.nth_tree(1) else { return parent; }; let iterator = &self[it]; let Some(id) = iterator.nth_token(0) else { return parent; }; let mut environment = Environment::new(self.mid, Some(parent), Location::Local); environment.insert(&self[id].as_str(&self.source), it); EnvironmentRef::new(environment) } fn environment_of_is_expression(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { assert_eq!(tree.kind, TreeKind::IsExpression); // The environment of an `is` expression is the environment produced by the pattern. let Some(pattern) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::Pattern) else { // Should really have a pattern in there; otherwise there was a // parse error, don't make more trouble. return self.environment_error_for(tree); }; // The left hand side of the `is` expression is used for wildcard types. let Some(lhs) = tree.nth_tree(0) else { return self.environment_error_for(tree); }; self.environment_of_pattern(parent, pattern, lhs) } fn environment_of_match_arm( &self, parent: EnvironmentRef, t: TreeRef, tree: &Tree, ) -> EnvironmentRef { assert_eq!(tree.kind, TreeKind::MatchArm); // The environment of a `match arm` expression is the environment produced by the pattern. let Some(pattern) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::Pattern) else { // Should really have a pattern in there; otherwise there was a // parse error, don't make more trouble. return self.environment_error_for(tree); }; // The expression in the match expression is the binding for the wildcard pattern. // If we are somewhere weird then... uh.... let Some(match_body) = tree.parent else { self.internal_compiler_error(Some(t), "no parent on match arm"); }; let match_body = &self[match_body]; if match_body.kind != TreeKind::MatchBody { self.internal_compiler_error(Some(t), "match arm parent not match body"); } let Some(match_expression) = match_body.parent else { self.internal_compiler_error(Some(t), "no parent on match body"); }; let match_expression = &self[match_expression]; if match_expression.kind != TreeKind::MatchExpression { self.internal_compiler_error(Some(t), "match body parent not match expression"); } // The expression is the first tree child of match expression. let Some(lhs) = tree.nth_tree(2) else { return self.environment_error_for(tree); }; self.environment_of_pattern(parent, pattern, lhs) } // NOTE: THIS IS CALLED DIRECTLY, NOT VIA `environment_of` TO AVOID CYCLES. fn environment_of_pattern( &self, parent: EnvironmentRef, tree: &Tree, value_expr: TreeRef, ) -> EnvironmentRef { assert_eq!(tree.kind, TreeKind::Pattern); let Some(binding) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::VariableBinding) else { // No binding, no new environment. return parent; }; let Some(variable) = binding.nth_token(0) else { return self.environment_error_for(binding); }; let is_wildcard = tree .child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern) .is_some(); let variable_decl = if is_wildcard { // If the variable is bound to a wildcard then treat the value // expression as the declaration for the purpose of determining // type. value_expr } else { // Otherwise the binding is to the type expression which must // match for the variable to have a value. let Some(type_expr) = tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression) else { return self.environment_error_for(tree); }; type_expr }; // TODO: This binding should be un-assignable! Don't assign to this! let mut env = Environment::new(self.mid, Some(parent), Location::Local); env.insert(&self[variable].as_str(&self.source), variable_decl); EnvironmentRef::new(env) } pub fn class_of(&self, mid: ModuleId, t: TreeRef) -> ClassRef { if mid != self.mid { let Some(other_semantics) = self.import_by_id(mid) else { self.internal_compiler_error(Some(t), "Have a class we can't resolve"); }; return other_semantics.class_of(mid, t); } { // I want to make sure that this borrow is dropped after this block. let mut borrow = self.classes.borrow_mut(); let state = &mut borrow[t.index()]; match state { Incremental::None => (), Incremental::Complete(e) => return e.clone(), Incremental::InProgress => { drop(borrow); self.internal_compiler_error(Some(t), "circular class dependency"); } } *state = Incremental::InProgress; } // TODO: Right now there's only one way to make a class decl. :P let tree = &self[t]; assert_eq!(tree.kind, TreeKind::ClassDecl); let name = tree .nth_token(1) .map(|t| self[t].as_str(&self.source)) .unwrap_or(""); // Fields let mut fields = Vec::new(); for field in tree.children_of_kind(&self.syntax_tree, TreeKind::FieldDecl) { let f = &self[field]; if let Some(field_name) = f.nth_token(0) { let field_type = f .nth_tree(2) .map(|t| self.type_of(t)) .unwrap_or_else(|| self.type_error_for(f)); fields.push(FieldDecl { name: self[field_name].as_str(&self.source).into(), declaration: field, field_type, }); } } // Methods let mut methods = Vec::new(); for method in tree.children_of_kind(&self.syntax_tree, TreeKind::FunctionDecl) { let m = &self[method]; if let Some(method_name) = m.nth_token(1) { let method_name = &self[method_name]; // TODO: Check to see if it is actually a method, or if it is a static function. let decl_type = self.type_of(method); match decl_type { Type::Method(..) => { methods.push(MethodDecl { name: method_name.as_str(&self.source).into(), decl_type, declaration: method, is_static: false, }); } _ => { // TODO: Default to method or static? methods.push(MethodDecl { name: method_name.as_str(&self.source).into(), decl_type, declaration: method, is_static: true, }); } } } } // Build into an environment let mut env = Environment::new(self.mid, None, Location::Slot); let mut static_env = Environment::new(self.mid, None, Location::Slot); for (index, field) in fields.iter().enumerate() { env.declarations.insert( (&*field.name).into(), Declaration { location: Location::Slot, index, module: self.mid, origin: Origin::Source(field.declaration), exported: false, }, ); } for method in methods.iter() { let target = if method.is_static { &mut static_env.declarations } else { &mut env.declarations }; let existing = target.insert( (&*method.name).into(), Declaration { location: Location::Function, index: self.function_index_of(method.declaration), module: self.mid, origin: Origin::Source(method.declaration), exported: false, }, ); if existing.is_some() { self.report_error_tree_ref( method.declaration, format!("duplicate definition of method '{}'", method.name), ); } } let result = ClassRef::new(ClassDecl { name: name.into(), fields: fields.into(), methods: methods.into(), decl_tree: t, env: EnvironmentRef::new(env), static_env: EnvironmentRef::new(static_env), }); self.classes.borrow_mut()[t.index()] = Incremental::Complete(result.clone()); result } pub fn build_alternate(&self, left: &Type, right: &Type) -> Type { if left.is_error() { left.clone() } else if right.is_error() { right.clone() } else if self.can_convert(left, right) { right.clone() } else if self.can_convert(right, left) { left.clone() } else { let mut types: Vec = Vec::new(); if let Type::Alternate(ts) = left { types.extend_from_slice(&*ts); } else { types.push(left.clone()); } if let Type::Alternate(ts) = right { types.extend_from_slice(&*ts); } else { types.push(right.clone()); } types.sort(); types.dedup(); Type::Alternate(types.into()) } } pub fn can_convert(&self, from: &Type, to: &Type) -> bool { // TODO: This is wrong; we because of numeric literals etc. match (from, to) { (Type::F64, Type::F64) => true, (Type::String, Type::String) => true, (Type::Bool, Type::Bool) => true, (Type::Unreachable, Type::Unreachable) => true, (Type::Nothing, Type::Nothing) => true, (Type::List(from), Type::List(to)) => self.can_convert(from, to), (Type::Function(from_args, from_ret), Type::Function(to_args, to_ret)) => { from_args.len() == from_args.len() && self.can_convert(to_ret, from_ret) && from_args .iter() .zip(to_args.iter()) .all(|(from, to)| self.can_convert(from, to)) } (Type::Object(m_from, c_from, _), Type::Object(m_to, c_to, _)) => { // TODO: Structural comparisons. All that matters is that // c_to has a subset of fields and methods, and the // fields and methods are all compatible. // m_from == m_to && c_from == c_to } // Avoid introducing more errors (Type::Error(_), _) => true, (_, Type::Error(_)) => true, // Can... I... convert unreachable always? Is this sound? (Type::Unreachable, _) => true, // Alternates convert if either side can convert. (Type::Alternate(lts), Type::Alternate(_)) => { for lt in lts.iter() { if !self.can_convert(lt, to) { return false; } } true } (_, Type::Alternate(rts)) => { for rt in rts.iter() { if self.can_convert(from, rt) { return true; } } false } // TODO: Unification on type variables! :D (_, _) => false, } } pub fn type_of(&self, t: TreeRef) -> Type { { let state = &mut self.types.borrow_mut()[t.index()]; match state { Incremental::None => (), Incremental::Complete(existing) => return existing.clone(), Incremental::InProgress => { // eprintln!("type_of circular => {t:?}"); let error = self .report_error_tree_ref(t, "The type of this expression depends on itself"); let e_type = Type::Error(error); *state = Incremental::Complete(e_type.clone()); return e_type; } } *state = Incremental::InProgress; } let tree = &self[t]; // eprintln!(">>> type_of => {tree:?}"); let result = match tree.kind { TreeKind::Error => Some(self.type_error_for(tree)), TreeKind::AlternateType => self.type_of_alternate_type(tree), TreeKind::Argument => self.type_of_argument(tree), TreeKind::BinaryExpression => self.type_of_binary(tree), TreeKind::Block => self.type_of_block(tree), TreeKind::CallExpression => self.type_of_call(tree), TreeKind::ClassDecl => self.type_of_class_decl(t, tree), TreeKind::ConditionalExpression => self.type_of_conditional(tree), TreeKind::ExpressionStatement => self.type_of_expression_statement(tree), TreeKind::FieldDecl => self.type_of_field_decl(tree), TreeKind::FieldValue => self.type_of_field_value(t, tree), TreeKind::ForStatement => Some(Type::Nothing), TreeKind::FunctionDecl => self.type_of_function_decl(tree), TreeKind::GroupingExpression => self.type_of_grouping(tree), TreeKind::Identifier => self.type_of_identifier(t, tree), TreeKind::IfStatement => self.type_of_if_statement(tree), TreeKind::IsExpression => Some(Type::Bool), TreeKind::IteratorVariable => self.type_of_iterator_variable(tree), TreeKind::LetStatement => Some(Type::Nothing), TreeKind::ListConstructor => self.type_of_list_constructor(t, tree), TreeKind::ListConstructorElement => self.type_of_list_constructor_element(tree), TreeKind::LiteralExpression => self.type_of_literal(tree), TreeKind::MatchArm => self.type_of_match_arm(tree), TreeKind::MatchBody => self.type_of_match_body(tree), TreeKind::MatchExpression => self.type_of_match_expression(tree), TreeKind::MemberAccess => self.type_of_member_access(tree), TreeKind::NewObjectExpression => self.type_of_new_object_expression(tree), TreeKind::Parameter => self.type_of_parameter(tree), TreeKind::Pattern => self.type_of_pattern(tree), TreeKind::ReturnStatement => Some(Type::Unreachable), TreeKind::ReturnType => self.type_of_return_type(tree), TreeKind::SelfParameter => self.type_of_self_parameter(tree), TreeKind::SelfReference => self.type_of_self_reference(t, tree), TreeKind::TypeExpression => self.type_of_type_expr(tree), TreeKind::TypeIdentifier => self.type_of_type_identifier(t, tree), TreeKind::TypeParameter => self.type_of_type_parameter(tree), TreeKind::UnaryExpression => self.type_of_unary(tree), TreeKind::WhileStatement => self.type_of_while(tree), TreeKind::Import => self.type_of_import(tree), _ => self.internal_compiler_error(Some(t), "asking for a nonsense type"), }; // NOTE: These return `None` if they encounter some problem. let result = result.unwrap_or_else(|| self.type_error_for(tree)); self.types.borrow_mut()[t.index()] = Incremental::Complete(result.clone()); // eprintln!("<<< type_of => {tree:?}"); result } fn type_of_unary(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::UnaryExpression); let op = &self[tree.nth_token(0)?]; let expr = tree.nth_tree(1)?; let argument_type = self.type_of(expr); match (op.kind, argument_type) { (TokenKind::Plus, Type::F64) => Some(Type::F64), (TokenKind::Minus, Type::F64) => Some(Type::F64), (TokenKind::Bang, Type::Bool) => Some(Type::Bool), // This is dumb and should be punished, probably. (_, Type::Unreachable) => { let err = self.report_error_span( op.start(), op.end(), "cannot apply a unary operator to something that doesn't yield a value", ); Some(Type::Error(err)) } // Propagate existing errors without additional complaint. (_, Type::Error(e)) => Some(Type::Error(e)), (_, arg_type) => { let err = self.report_error_span( op.start(), op.end(), format!( "cannot apply unary operator '{}' to value of type {}", op.as_str(&self.source), arg_type ), ); Some(Type::Error(err)) } } } fn type_of_binary(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::BinaryExpression); let left_tree = tree.nth_tree(0)?; let lhs = self.type_of(left_tree); let op = &self[tree.nth_token(1)?]; let rhs = self.type_of(tree.nth_tree(2)?); match (op.kind, lhs, rhs) { ( TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash, Type::F64, Type::F64, ) => Some(Type::F64), (TokenKind::Plus, Type::String, Type::String) => Some(Type::String), (TokenKind::And | TokenKind::Or, Type::Bool, Type::Bool) => Some(Type::Bool), (TokenKind::EqualEqual, Type::F64, Type::F64) => Some(Type::Bool), (TokenKind::EqualEqual, Type::String, Type::String) => Some(Type::Bool), (TokenKind::EqualEqual, Type::Bool, Type::Bool) => Some(Type::Bool), (TokenKind::EqualEqual, Type::Nothing, Type::Nothing) => Some(Type::Bool), (TokenKind::Less, Type::F64, Type::F64) => Some(Type::Bool), (TokenKind::LessEqual, Type::F64, Type::F64) => Some(Type::Bool), (TokenKind::Greater, Type::F64, Type::F64) => Some(Type::Bool), (TokenKind::GreaterEqual, Type::F64, Type::F64) => Some(Type::Bool), (TokenKind::Less, Type::String, Type::String) => Some(Type::Bool), (TokenKind::LessEqual, Type::String, Type::String) => Some(Type::Bool), (TokenKind::Greater, Type::String, Type::String) => Some(Type::Bool), (TokenKind::GreaterEqual, Type::String, Type::String) => Some(Type::Bool), // This is dumb and should be punished, probably. (_, _, Type::Unreachable) => { let err = self.report_error_span( op.start(), op.end(), format!( "cannot apply '{}' to an argument that doesn't yield a value (on the right)", op.as_str(&self.source) ), ); Some(Type::Error(err)) } (_, Type::Unreachable, _) => { let err = self.report_error_span( op.start(), op.end(), format!( "cannot apply '{}' to an argument that doesn't yield a value (on the left)", op.as_str(&self.source) ), ); Some(Type::Error(err)) } // Propagate existing errors without additional complaint. (_, Type::Error(e), _) => Some(Type::Error(e)), (_, _, Type::Error(e)) => Some(Type::Error(e)), // Assignments are fun. (TokenKind::Equal, a, b) => self.type_of_assignment(left_tree, a, b, &op), // Missed the whole table, it must be an error. (_, left_type, right_type) => { let err =self.report_error_span( op.start(), op.end(), format!( "cannot apply binary operator '{}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)", op.as_str(&self.source) ), ); Some(Type::Error(err)) } } } fn type_of_assignment( &self, left_tree: TreeRef, left_type: Type, right_type: Type, op: &Token, ) -> Option { // Ensure the left tree is an lvalue let tree = &self[left_tree]; #[allow(unused_assignments)] let mut environment = None; let declaration = match tree.kind { // TODO: Assign to list access TreeKind::Identifier => { let id = self[tree.nth_token(0)?].as_str(&self.source); environment = Some(self.environment_of(left_tree)); match environment.as_ref().unwrap().bind(id) { Some(decl) => decl, None => { let error = if let Some(e) = &environment.as_ref().unwrap().error { e.clone() } else { self.report_error_tree(tree, format!("cannot find value {id} here")) }; return Some(Type::Error(error)); } } } TreeKind::MemberAccess => { let id = self[tree.nth_token(2)?].as_str(&self.source); let typ = self.type_of(tree.nth_tree(0)?); environment = Some(self.member_environment(left_tree, &typ)); match environment.as_ref().unwrap().bind(id) { Some(decl) => decl, None => { let error = if let Some(e) = &environment.as_ref().unwrap().error { e.clone() } else { self.report_error_tree(tree, format!("'{typ}' has no member {id}")) }; return Some(Type::Error(error)); } } } _ => { let error = self.report_error_tree_ref( left_tree, "cannot assign a value to this expression, it is not a place you can store things", ); return Some(Type::Error(error)); } }; match declaration.location { Location::Argument | Location::Slot | Location::Local | Location::Module => (), Location::ExternalFunction | Location::Function => { let error = self.report_error_tree_ref( left_tree, "cannot assign a new value to a function declaration", ); return Some(Type::Error(error)); } Location::Class => { let error = self.report_error_tree_ref( left_tree, "cannot assign a new value to a class declaration", ); return Some(Type::Error(error)); } Location::Import => { let error = self.report_error_tree_ref( left_tree, "cannot assign a new value to an imported module", ); return Some(Type::Error(error)); } } let _ = environment; let left_type = match left_type { Type::Assignment(x) => *x, t => t, }; let right_type = match right_type { Type::Assignment(x) => *x, t => t, }; if let Type::Error(e) = left_type { Some(Type::Error(e)) } else if let Type::Error(e) = right_type { Some(Type::Error(e)) } else if self.can_convert(&right_type, &left_type) { Some(Type::Assignment(Box::new(left_type))) } else { let error = self.report_error_span( op.start(), op.end(), format!("cannot assign a value of type '{right_type}' to type '{left_type}'"), ); Some(Type::Error(error)) } } fn type_of_type_expr(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::TypeExpression); Some(self.type_of(tree.nth_tree(0)?)) } fn type_of_type_identifier(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::TypeIdentifier); // TODO: This will *clearly* need to get better. let token = self[tree.nth_token(0)?].as_str(&self.source); match token { "f64" => Some(Type::F64), "string" => Some(Type::String), "bool" => Some(Type::Bool), "nothing" => Some(Type::Nothing), "list" => { let args = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::TypeParameterList)?; let mut arg_types: Vec<_> = args.child_trees().map(|t| self.type_of(t)).collect(); if arg_types.len() != 1 { let error = self.report_error_tree(tree, "list takes a single type argument"); Some(Type::Error(error)) } else { Some(Type::List(Box::new(arg_types.pop().unwrap()))) } } _ => { let environment = self.environment_of(t); match environment.bind(token) { Some(declaration) => { match declaration.location { Location::Class => Some(self.type_of_declaration(declaration)), Location::Argument | Location::Slot | Location::Local | Location::Module => { let error = self.report_error_tree( tree, format!("'{token}' is a variable and cannot be used as a type"), ); Some(Type::Error(error)) } Location::Function | Location::ExternalFunction => { let error = self.report_error_tree( tree, format!("'{token}' is a function and cannot be used as a type"), ); Some(Type::Error(error)) } Location::Import => { let error = self.report_error_tree( tree, format!("'{token}' is an imported module and cannot be used as a type"), ); Some(Type::Error(error)) } } } None => { let error = if let Some(e) = &environment.error { e.clone() } else { self.report_error_tree(tree, format!("Unrecognized type: '{token}'")) }; Some(Type::Error(error)) } } } } } fn type_of_type_parameter(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::TypeParameter); Some(self.type_of(tree.nth_tree(0)?)) } fn type_of_block(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Block); if tree.children.len() < 2 { return None; } if tree.children.len() == 2 { // Empty blocks generate Nothing. return Some(Type::Nothing); } // The type of the block is the type of the last expression. // (But the last child is the closing brace probably?) let last_is_brace = tree.nth_token(tree.children.len() - 1).is_some(); let last_index = tree.children.len() - if last_is_brace { 2 } else { 1 }; let mut is_unreachable = false; for i in 1..last_index { // TODO: if `is_unreachable` here then we actually have // unreachable code here! We should warn about it // I guess. is_unreachable = matches!(self.type_of(tree.nth_tree(i)?), Type::Unreachable) || is_unreachable; } // NOTE: If for some reason the last statement is unsuitable for a // type then we consider the type of the block to be Nothing. // (And explicitly not Error, which is what returning None // would yield.) let last_type = self.type_of(tree.nth_tree(last_index)?); // If anything in this block generated an "Unreachable" then the // whole type of the block is "unreachable" no matter what. Some(if is_unreachable { Type::Unreachable } else { last_type }) } fn type_of_literal(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::LiteralExpression); let tok = &self[tree.nth_token(0)?]; let pig = match tok.kind { TokenKind::Number => Type::F64, TokenKind::String => Type::String, TokenKind::True | TokenKind::False => Type::Bool, _ => panic!( "the token {} doesn't have a type!", tok.as_str(&self.source) ), }; Some(pig) } fn type_of_grouping(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::GroupingExpression); tree.nth_tree(1).map(|t| self.type_of(t)) } fn type_of_conditional(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::ConditionalExpression); let then_type = self.type_of(tree.nth_tree(2)?); let has_else = tree .nth_token(3) .map(|t| self[t].kind == TokenKind::Else) .unwrap_or(false); let else_type = if has_else { Some(self.type_of(tree.nth_tree(4)?)) } else { None }; match (then_type, else_type) { (Type::Error(e), _) => Some(Type::Error(e)), (_, Some(Type::Error(e))) => Some(Type::Error(e)), (Type::Unreachable, None) => Some(Type::Nothing), (Type::Unreachable, Some(t)) => Some(t), (t, Some(Type::Unreachable)) => Some(t), (then_type, else_type) => { let else_type = else_type.unwrap_or(Type::Nothing); Some(self.build_alternate(&then_type, &else_type)) } } } fn type_of_call(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::CallExpression); // TODO: Move the vast majority of error checking out of this // function: once you know that the 0th tree (the function // expression) yields a function type, assume the type of the // call is the type of the function return. Don't bother // matching argument types &c; do that in an explicit // check_call_expression function below. let f_ref = tree.nth_tree(0)?; let f = self.type_of(f_ref); let arg_list = &self[tree.nth_tree(1)?]; let arg_types: Vec<_> = arg_list .children .iter() .filter_map(|c| match c { Child::Tree(t) => Some((*t, self.type_of(*t))), _ => None, }) .collect(); // Propagate type errors if there are any. let type_error = if let Type::Error(e) = &f { Some(e.clone()) } else { arg_types.iter().find_map(|(_, t)| match t { Type::Error(e) => Some(e.clone()), _ => None, }) }; if let Some(error) = type_error { return Some(Type::Error(error)); } match f { Type::Function(params, ret) => { let mut param_error = None; if params.len() != arg_types.len() { // TODO: Augment with function name if known let err = self .report_error_tree(tree, format!("expected {} parameters", params.len())); param_error = Some(err); } for (i, ((t, a), p)) in arg_types.iter().zip(params.iter()).enumerate() { // a here is the type of the argument expression; p is // the declared type of the parameter. if !self.can_convert(&a, p) { let err = self.report_error_tree_ref( *t, format!( "parameter {i} has an incompatible type: expected {} but got {}", p, a ), ); param_error = Some(err); } } if let Some(param_error) = param_error { return Some(Type::Error(param_error)); } Some(*ret.clone()) } Type::Method(_, params, ret) => { let mut param_error = None; // For the purposes of type checking ignore the self type. if params.len() != arg_types.len() { // TODO: Augment with function name if known let err = self .report_error_tree(tree, format!("expected {} parameters", params.len())); param_error = Some(err); } for (i, ((t, a), p)) in arg_types.iter().zip(params.iter()).enumerate() { // a here is the type of the argument expression; p is // the declared type of the parameter. if !self.can_convert(&a, p) { let err = self.report_error_tree_ref( *t, format!( "parameter {i} has an incompatible type: expected {} but got {}", p, a ), ); param_error = Some(err); } } if let Some(param_error) = param_error { return Some(Type::Error(param_error)); } Some(*ret.clone()) } _ => { let err = self .report_error_tree_ref(f_ref, format!("expected a function type, got: {f}")); Some(Type::Error(err)) } } } fn type_of_argument(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Argument); let result = self.type_of(tree.nth_tree(0)?); Some(result) } fn type_of_member_access(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::MemberAccess); let lhs = tree.nth_tree(0)?; let typ = self.type_of(lhs); let env = self.member_environment(lhs, &typ); let id = &self[tree.nth_token(2)?]; if id.kind != TokenKind::Identifier { return Some(self.type_error_for(tree)); } let id_str = id.as_str(&self.source); let Some(declaration) = env.bind(id_str) else { let error = if let Some(e) = &env.error { e.clone() } else { self.report_error_span( id.start(), id.end(), format!("'{typ}' has no member {id_str}"), ) }; return Some(Type::Error(error)); }; Some(self.type_of_declaration(declaration)) } pub fn member_environment(&self, t: TreeRef, typ: &Type) -> EnvironmentRef { match &typ { Type::Object(mid, ct, _) => { let class = self.class_of(*mid, *ct); class.env.clone() } Type::Class(mid, ct, _) => { let class = self.class_of(*mid, *ct); class.static_env.clone() } Type::Module(_, import) => { // TODO: Cache this somehow, man. let Some(other) = self.import_by_id(*import) else { self.internal_compiler_error(Some(t), "Unable to bind module"); }; let Some(root) = other.syntax_tree.root() else { self.internal_compiler_error(Some(t), "Other syntax tree has no root"); }; let rt = &other[root]; assert_eq!(rt.kind, TreeKind::File); let mut result = Environment::new(self.mid, None, Location::Module); let other_env = other.environment_of(root); for (name, decl) in other_env.declarations.iter() { if decl.is_exported() { result.declarations.insert(name.clone(), decl.clone()); } } EnvironmentRef::new(result) } Type::Error(e) => return Environment::error(e.clone()), _ => { let error = self.report_error_tree_ref(t, format!("cannot access members of '{typ}'")); return Environment::error(error); } } } fn type_of_expression_statement(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::ExpressionStatement); let last_is_semicolon = tree .nth_token(tree.children.len() - 1) .map(|t| self[t].kind == TokenKind::Semicolon) .unwrap_or(false); let expression_type = tree.nth_tree(0).map(|t| self.type_of(t)); match expression_type { Some(Type::Unreachable) => Some(Type::Unreachable), _ => { // A semicolon at the end of an expression statement discards // the value, leaving us with nothing. (Even if the // expression otherwise generated a type error!) if last_is_semicolon { Some(Type::Nothing) } else { expression_type } } } } fn type_of_identifier(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Identifier); let id = self[tree.nth_token(0)?].as_str(&self.source); let environment = self.environment_of(t); if let Some(declaration) = environment.bind(id) { let typ = self.type_of_declaration(declaration); // The one weirdsy here is that if this is an identifier that refers // directly to a class then this should be a *class* type not an // *object* type. let typ = if declaration.location == Location::Class { match typ { Type::Object(m, t, n) => Type::Class(m, t, n), _ => self.internal_compiler_error( Some(t), "This class declaration did not yield type object!", ), } } else { typ }; return Some(typ); } let error = if let Some(e) = &environment.error { e.clone() } else { self.report_error_tree(tree, format!("cannot find value {id} here")) }; Some(Type::Error(error)) } fn type_of_declaration(&self, declaration: &Declaration) -> Type { match &declaration.origin { Origin::External(t) => t.clone(), Origin::Source(t) => { if declaration.module == self.mid { self.type_of(*t) } else { let Some(other_semantics) = self.import_by_id(declaration.module) else { let message = format!( "Cannot find a module matching this decl's mid: {:?}", declaration.module ); self.internal_compiler_error(Some(*t), &message); }; other_semantics.type_of(*t) } } } } fn type_of_self_parameter(&self, tree: &Tree) -> Option { let pl = tree.parent?; let param_list = &self[pl]; let fd = param_list.parent?; let function_decl = &self[fd]; let cd = function_decl.parent?; let class_decl = &self[cd]; if class_decl.kind != TreeKind::ClassDecl { let error = self.report_error_tree(tree, "self parameter only allowed in methods"); Some(Type::Error(error)) } else { Some(self.type_of(cd)) } } fn type_of_self_reference(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::SelfReference); let id = self[tree.nth_token(0)?].as_str(&self.source); let environment = self.environment_of(t); if let Some(declaration) = environment.bind(id) { return Some(self.type_of_declaration(declaration)); } let error = if let Some(e) = &environment.error { e.clone() } else { self.report_error_tree(tree, "`self` is only valid in methods") }; Some(Type::Error(error)) } fn type_of_function_decl(&self, tree: &Tree) -> Option { let param_list = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::ParamList)?; // NOTE: The methodness here is determined by the presence of a self // parameter, even if that parameter is incorrect (e.g., this // declaration is not nested in a class, or it is not the first // parameter.) We could have also chosen to signal it by our // nesting but we want to extract the self parameter to a // distinguished place in the function type. let mut self_type = None; let mut parameter_types = Vec::new(); for p in param_list.child_trees() { let p_type = Box::new(self.type_of(p)); if self[p].kind == TreeKind::SelfParameter { self_type = Some(p_type); } else { parameter_types.push(p_type); } } let return_type = match tree.child_of_kind(&self.syntax_tree, TreeKind::ReturnType) { Some(t) => self.type_of(t), None => Type::Nothing, }; let return_type = Box::new(return_type); Some(match self_type { Some(self_type) => Type::Method(self_type, parameter_types, return_type), None => Type::Function(parameter_types, return_type), }) } fn type_of_parameter(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Parameter); match tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression) { Some(t) => Some(self.type_of(t)), None => { let error = self.report_error_tree(tree, format!("the parameter is missing a type")); Some(Type::Error(error)) } } } fn type_of_iterator_variable(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::IteratorVariable); let parent = &self[tree.parent?]; assert_eq!(parent.kind, TreeKind::ForStatement); let enumerable = parent.nth_tree(3)?; let item_type = match self.type_of(enumerable) { Type::Error(e) => Type::Error(e), Type::List(x) => (&*x).clone(), _ => { let error = self.report_error_tree_ref(enumerable, "this expression is not enumerable"); Type::Error(error) } }; Some(item_type) } fn type_of_list_constructor(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::ListConstructor); let mut element_type = None; for ct in tree.child_trees() { let child_type = self.type_of(ct); element_type = match element_type { None => Some(child_type), Some(list_type) => { if list_type.is_error() { Some(child_type) } else if child_type.is_error() { Some(list_type) } else if self.can_convert(&child_type, &list_type) { Some(list_type) } else if self.can_convert(&list_type, &child_type) { Some(child_type) } else { // Even if there is some incompatibility we stick // with the list type, preferring to guess what was // intended rather than just error out. self.report_error_tree_ref(ct, format!("list element of type {child_type} is not compatible with the list type {list_type}")); Some(list_type) } } } } let element_type = element_type.unwrap_or_else(|| Type::TypeVariable(t)); Some(Type::List(Box::new(element_type))) } fn type_of_new_object_expression(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::NewObjectExpression); // NOTE: Matching fields is done in the check function, as is // reporting on the suitability of the type. Some(self.type_of(tree.nth_tree(1)?)) } fn type_of_class_decl(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::ClassDecl); // The details of a class are computed lazily, but this is enough of // a belly-button. let name = &self[tree.nth_token(1)?]; // NOTE: There's a kind of a weird design decision here, which is to // return an instance type instead of a class type. This is // because it turns out to be what you want most of the time: // variables should be object type, arguments should be object // type, etc. // // There are only two places where you have to deal with this // being weird: first, in the new object expression, and // second, in static member access. // // For new object expression it turns out to not matter much, // you just have to be aware that the type identifier is an // object type. // // For the static member access, that one is just plain weird. // But it's easier to handle converting object type -> class // type there (in type_of_identifier) rather than flipping the // default here and dealing with converting class type -> // object type literally everywhere else. Some(Type::Object(self.mid, t, name.as_str(&self.source).into())) } fn type_of_field_decl(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::FieldDecl); // Type of a field declaration is the type of the type expression. Some(self.type_of(tree.nth_tree(2)?)) } fn type_of_field_value(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::FieldValue); if let Some(colon) = tree.nth_token(1) { if self[colon].kind == TokenKind::Colon { // Form 1: { x: e, ... } return Some(self.type_of(tree.nth_tree(2)?)); } } // Form 2: { x, ... } let environment = self.environment_of(t); let id = self[tree.nth_token(0)?].as_str(&self.source); let declaration = match environment.bind(id) { Some(d) => d, None => { let error = if let Some(e) = &environment.error { e.clone() } else { self.report_error_tree(tree, format!("cannot find value {id} here")) }; return Some(Type::Error(error)); } }; match declaration.location { Location::Argument | Location::Slot | Location::Local | Location::Module | Location::Function | Location::ExternalFunction => Some(self.type_of_declaration(declaration)), Location::Class => { let error = self.report_error_tree( tree, format!("'{id}' is a class, and cannot be the value of a field"), ); Some(Type::Error(error)) } Location::Import => { let error = self.report_error_tree( tree, format!("'{id}' is an imported module, and cannot be the value of a field"), ); Some(Type::Error(error)) } } } fn type_of_list_constructor_element(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::ListConstructorElement); Some(self.type_of(tree.nth_tree(0)?)) } fn type_of_return_type(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::ReturnType); Some(self.type_of(tree.nth_tree(1)?)) // type expression } fn type_of_if_statement(&self, tree: &Tree) -> Option { Some(self.type_of(tree.nth_tree(0)?)) } fn type_of_alternate_type(&self, tree: &Tree) -> Option { // TODO: IDEA: nth_tree returns a bogus tree if not a tree, stop returning Option? let left = self.type_of(tree.nth_tree(0)?); let right = self.type_of(tree.nth_tree(2)?); Some(self.build_alternate(&left, &right)) } fn type_of_match_expression(&self, tree: &Tree) -> Option { Some(self.type_of(tree.child_of_kind(&self.syntax_tree, TreeKind::MatchBody)?)) } fn type_of_match_body(&self, tree: &Tree) -> Option { let arms: Vec<_> = tree .children_of_kind(&self.syntax_tree, TreeKind::MatchArm) .collect(); if arms.len() == 0 { let error = self.report_error_tree(tree, "a match expression must have at least one arm"); Some(Type::Error(error)) } else { let mut actual_type = self.type_of(arms[0]); for arm in &arms[1..] { let arm_type = self.type_of(*arm); if !self.can_convert(&arm_type, &actual_type) { if self.can_convert(&actual_type, &arm_type) { // New lowest-common-denominator type actual_type = arm_type; } else { self.report_error_tree_ref(*arm, format!("this arm produces a value of type '{arm_type}' which is incompatible with the general result type {actual_type}")); } } } Some(actual_type) } } fn type_of_match_arm(&self, tree: &Tree) -> Option { // The type of a match arm is the type of the expression to the right. Some(self.type_of(tree.nth_tree(2)?)) } fn type_of_while(&self, tree: &Tree) -> Option { match self.constant_eval(tree.nth_tree(1)?) { Some(StackValue::Bool(true)) => Some(Type::Unreachable), _ => Some(Type::Nothing), } } fn type_of_pattern(&self, tree: &Tree) -> Option { // We know that we have a type expression in here, that's what we're asking about. Some(self.type_of(tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression)?)) } fn type_of_import(&self, tree: &Tree) -> Option { let tok = &self[tree.nth_token(1)?]; if tok.kind != TokenKind::String { return Some(self.type_error_for(tree)); } // do we bind it here? it's not normalized.... let name = string_constant_to_string(tok.as_str(&self.source)); let Some(import_map) = self.import_map.get() else { self.internal_compiler_error(None, "import map not initialized"); }; match import_map.get(&name) { Some(import) => Some(Type::Module(name.into(), *import)), None => { let error = self.report_error_tree(tree, format!("unable to resolve module import {name}")); Some(Type::Error(error)) } } } // TODO: Really want to TEST THIS also uh can we generate bytecode for functions and call it?? fn constant_eval(&self, t: TreeRef) -> Option { // TODO: Make this cached, incremental, so the compiler can use it for optimizations. let tree = &self[t]; match tree.kind { TreeKind::LiteralExpression => { let tok = &self[tree.nth_token(0)?]; match self.type_of(t) { Type::F64 => Some(StackValue::Float(tok.as_str(&self.source).parse().unwrap())), Type::Bool => Some(StackValue::Bool(tok.kind == TokenKind::True)), Type::String => Some(StackValue::String( string_constant_to_string(tok.as_str(&self.source)).into(), )), Type::Nothing => Some(StackValue::Nothing), // ? _ => None, } } TreeKind::IsExpression => { let pt = tree.nth_tree(2)?; let pattern = &self[pt]; if pattern .child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern) .is_some() { Some(StackValue::Bool(true)) } else if self.can_convert(&self.type_of(tree.nth_tree(0)?), &self.type_of(pt)) { Some(StackValue::Bool(true)) } else { None } } TreeKind::GroupingExpression => self.constant_eval(tree.nth_tree(1)?), TreeKind::UnaryExpression => { let op = self[tree.nth_token(0)?].kind; let val = self.constant_eval(tree.nth_tree(1)?)?; match (op, val) { (TokenKind::Plus, StackValue::Float(a)) => Some(StackValue::Float(a)), (TokenKind::Minus, StackValue::Float(a)) => Some(StackValue::Float(-a)), (TokenKind::Bang, StackValue::Bool(a)) => Some(StackValue::Bool(!a)), _ => None, } } TreeKind::BinaryExpression => { let left = self.constant_eval(tree.nth_tree(0)?)?; let right = self.constant_eval(tree.nth_tree(2)?)?; let op = self[tree.nth_token(1)?].kind; match (op, left, right) { (TokenKind::Plus, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Float(a + b)) } (TokenKind::Minus, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Float(a - b)) } (TokenKind::Star, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Float(a * b)) } (TokenKind::Slash, StackValue::Float(a), StackValue::Float(b)) => { if b != 0.0 { Some(StackValue::Float(a / b)) } else { None // TODO: Error } } (TokenKind::Plus, StackValue::String(a), StackValue::String(b)) => { let mut result = String::new(); result.push_str(&*a); result.push_str(&*b); Some(StackValue::String(result.into())) } (TokenKind::And, StackValue::Bool(a), StackValue::Bool(b)) => { Some(StackValue::Bool(a && b)) } (TokenKind::Or, StackValue::Bool(a), StackValue::Bool(b)) => { Some(StackValue::Bool(a || b)) } (TokenKind::EqualEqual, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Bool(a == b)) } (TokenKind::EqualEqual, StackValue::String(a), StackValue::String(b)) => { Some(StackValue::Bool(a == b)) } (TokenKind::EqualEqual, StackValue::Bool(a), StackValue::Bool(b)) => { Some(StackValue::Bool(a == b)) } (TokenKind::EqualEqual, StackValue::Nothing, StackValue::Nothing) => { Some(StackValue::Bool(true)) } (TokenKind::Less, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Bool(a < b)) } (TokenKind::LessEqual, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Bool(a <= b)) } (TokenKind::Greater, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Bool(a > b)) } (TokenKind::GreaterEqual, StackValue::Float(a), StackValue::Float(b)) => { Some(StackValue::Bool(a >= b)) } (TokenKind::Less, StackValue::String(a), StackValue::String(b)) => { Some(StackValue::Bool(a < b)) } (TokenKind::LessEqual, StackValue::String(a), StackValue::String(b)) => { Some(StackValue::Bool(a <= b)) } (TokenKind::Greater, StackValue::String(a), StackValue::String(b)) => { Some(StackValue::Bool(a > b)) } (TokenKind::GreaterEqual, StackValue::String(a), StackValue::String(b)) => { Some(StackValue::Bool(a >= b)) } _ => None, } } _ => None, } } pub fn dump_compiler_state(&self, tr: Option) { eprintln!("Module: {:?}", self.mid); eprintln!("Parsed the tree as:"); eprintln!("\n{}", self.syntax_tree.dump(&self.source, true)); { let errors = self.snapshot_errors(); if errors.len() == 0 { eprintln!("There were no errors reported during checking.\n"); } else { eprintln!( "{} error{} reported during checking:", errors.len(), if errors.len() == 1 { "" } else { "s" } ); for error in errors.iter() { eprintln!(" Error: {error}"); } eprintln!(); } } { match self.import_map.get() { Some(m) => { eprintln!("Import map:"); for (k, b) in m.iter() { eprintln!(" {k} => {:?}", b); } eprintln!(); } None => { eprintln!("The import map is not set.\n") } } eprintln!("Module Table:"); for (id, _sem) in self.module_table.borrow().iter() { eprintln!(" {:?} => ??", id); } eprintln!(); } if let Some(tr) = tr { eprintln!("This is about the tree: {:?}", &self[tr]); eprintln!("The logical parent chain of the tree was:\n"); let mut current = Some(tr); while let Some(c) = current { let t = &self[c]; eprintln!(" {:?} [{}-{})", t.kind, t.start_pos, t.end_pos); current = self.logical_parents[c.index()]; } eprintln!("\nThe environment of the tree was:"); let mut environment = Some(self.environment_of(tr)); while let Some(env) = environment { if let Some(error) = &env.error { eprint!(" *** ERROR: {error}"); } for (k, v) in env.declarations.iter() { eprintln!( " {k}: {:?} {} ({:?} {:?})", v.location, v.index, v.module, v.origin ); } environment = env.parent.clone(); } eprintln!(); } } #[cold] #[track_caller] pub fn internal_compiler_error(&self, tr: Option, message: &str) -> ! { eprintln!("Internal compiler error: {message}!"); self.dump_compiler_state(tr); panic!("INTERNAL COMPILER ERROR: {message}") } } pub fn check(s: &Semantics) { for t in s.syntax_tree.trees() { let tree = &s[t]; match tree.kind { TreeKind::Error => {} // already reported TreeKind::File => {} TreeKind::FunctionDecl => check_function_decl(s, t, tree), TreeKind::ParamList => { let _ = s.environment_of(t); } TreeKind::Parameter => { let _ = s.type_of(t); } TreeKind::TypeExpression | TreeKind::AlternateType | TreeKind::TypeIdentifier => { let _ = s.type_of(t); } TreeKind::Block => { let _ = s.type_of(t); } TreeKind::LetStatement => check_let(s, tree), TreeKind::ReturnStatement => check_return_statement(s, tree), TreeKind::ExpressionStatement | TreeKind::LiteralExpression | TreeKind::GroupingExpression | TreeKind::UnaryExpression | TreeKind::BinaryExpression | TreeKind::MemberAccess => { let _ = s.type_of(t); } TreeKind::ConditionalExpression => check_conditional(s, tree), TreeKind::CallExpression => { let _ = s.type_of(t); } TreeKind::ArgumentList => {} TreeKind::Argument => { let _ = s.type_of(t); } TreeKind::IfStatement => {} TreeKind::Identifier => { let _ = s.type_of(t); } TreeKind::ReturnType => {} TreeKind::TypeParameter => {} TreeKind::TypeParameterList => {} TreeKind::ListConstructor => { let _ = s.type_of(t); } TreeKind::ListConstructorElement => { let _ = s.type_of(t); } TreeKind::ForStatement => check_for_statement(s, t), TreeKind::IteratorVariable => {} TreeKind::ClassDecl => check_class_declaration(s, tree), TreeKind::FieldDecl => {} TreeKind::FieldList => {} TreeKind::NewObjectExpression => check_new_object_expression(s, tree), TreeKind::FieldValue => {} TreeKind::SelfParameter => {} TreeKind::SelfReference => {} TreeKind::IsExpression => {} TreeKind::VariableBinding => {} TreeKind::Pattern => check_pattern(s, tree), TreeKind::WildcardPattern => {} TreeKind::MatchArm => {} TreeKind::MatchBody => check_match_body(s, t, tree), TreeKind::MatchExpression => {} TreeKind::WhileStatement => check_while_statement(s, tree), TreeKind::Import => { let _ = s.type_of(t); } TreeKind::Export => {} TreeKind::ExportList => { // TODO: Check that each name in the list is in the environment } } } } fn check_conditional(s: &Semantics, tree: &Tree) { let Some(cond_tree) = tree.nth_tree(1) else { return; }; let cond_type = s.type_of(cond_tree); if !s.can_convert(&cond_type, &Type::Bool) { if !cond_type.is_error() { s.report_error_tree_ref( cond_tree, format!("this condition produces '{cond_type}', but must produce bool"), ); } } } fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) { assert_eq!(tree.kind, TreeKind::FunctionDecl); let _ = s.environment_of(t); let return_type_tree = tree.child_of_kind(&s.syntax_tree, TreeKind::ReturnType); let return_type = return_type_tree .map(|t| s.type_of(t)) .unwrap_or(Type::Nothing); if let Some(body) = tree.child_of_kind(&s.syntax_tree, TreeKind::Block) { let body_type = s.type_of(body); if !s.can_convert(&body_type, &return_type) { // Just work very hard to get an appropriate error span. let (start, end) = return_type_tree .map(|t| { let rtt = &s[t]; (rtt.start_pos, rtt.end_pos) }) .unwrap_or_else(|| { let start = tree.start_pos; let end_tok = tree .nth_token(1) .unwrap_or_else(|| tree.nth_token(0).unwrap()); (start, s[end_tok].end()) }); s.report_error_span(start, end, format!("the body of this function yields a value of type '{body_type}', but callers expect this function to produce a '{return_type}'")); } } } fn check_let(s: &Semantics, tree: &Tree) { let Some(name) = tree.nth_token(1) else { return; }; let name = &s[name]; let Some(expr) = tree.nth_tree(3) else { return }; if let Type::Method(..) = s.type_of(expr) { s.report_error_span( name.start(), name.end(), "methods cannot be assigned to variables", ); } } fn check_return_statement(s: &Semantics, tree: &Tree) { assert_eq!(tree.kind, TreeKind::ReturnStatement); let mut enclosing_function = tree.parent; while let Some(fp) = enclosing_function { let fpt = &s[fp]; if fpt.kind == TreeKind::FunctionDecl { break; } enclosing_function = fpt.parent; } let Some(enclosing_function) = enclosing_function else { s.report_error_tree( tree, "a return statement can only be used inside a function", ); return; }; let function_type = s.type_of(enclosing_function); match function_type { Type::Function(_, expected_type) | Type::Method(_, _, expected_type) => { let actual_type = if let Some(expr) = tree.nth_tree(1) { s.type_of(expr) } else { Type::Nothing }; if !s.can_convert(&actual_type, &expected_type) { s.report_error_tree(tree, format!("callers of this function expect a value of type '{expected_type}' but this statement returns a value of type '{actual_type}'")); } } Type::Error(_) => (), _ => s.internal_compiler_error( Some(enclosing_function), "a return statement in here expected this to yield a function type", ), } // OK this one is a little bit messed up because it reaches *up*, sorry. } fn check_for_statement(s: &Semantics, t: TreeRef) { let _ = s.environment_of(t); } fn check_new_object_expression(s: &Semantics, tree: &Tree) { let Some(type_expression) = tree.nth_tree(1) else { return; }; let Some(field_list) = tree.child_tree_of_kind(&s.syntax_tree, TreeKind::FieldList) else { return; }; let class_type = s.type_of(type_expression); // TODO: Should yield a ClassType not an ObjectType? match &class_type { Type::Object(mid, c, _) => { // Get the class def from ... place. let class = s.class_of(*mid, *c); let mut any_errors = false; let mut field_bindings = HashMap::new(); for field in field_list.children_of_kind(&s.syntax_tree, TreeKind::FieldValue) { let f = &s[field]; if let Some(name) = f.nth_token(0) { let field_type = s.type_of(field); field_bindings.insert(s[name].as_str(&s.source), (field, field_type)); } else { any_errors = true; } } // Check individual bindings... for f in class.fields.iter() { if let Some((field_tree, expr_type)) = field_bindings.get(&*f.name) { if !s.can_convert(expr_type, &f.field_type) { s.report_error_tree_ref( *field_tree, format!( "field {} is of type {}, but this expression generates a {}", f.name, f.field_type, expr_type, ), ); } field_bindings.remove(&*f.name); } else if !any_errors { s.report_error_tree( tree, format!("missing an initializer for field {}", f.name), ); } } if !any_errors { for (n, (field_tree, _)) in field_bindings.iter() { s.report_error_tree_ref( *field_tree, format!("{} does not have a field named {}", class_type, n), ); } } } Type::Error(_) => (), ct => { s.report_error_tree_ref( type_expression, format!("expected this to be a class type, but it is {ct}"), ); } } } fn check_class_declaration(s: &Semantics, tree: &Tree) { let mut fields = HashMap::new(); for field in tree.children_of_kind(&s.syntax_tree, TreeKind::FieldDecl) { let f = &s[field]; let Some(name) = f.nth_token(0) else { continue; }; let name = s[name].as_str(&s.source); match fields.insert(name, field) { Some(_) => { s.report_error_tree(f, format!("duplicate definition of field '{name}'")); } None => {} } } } fn check_pattern(s: &Semantics, tree: &Tree) { // If there's an AND then it must produce a boolean. let and_index = tree.children.iter().position(|c| match c { Child::Token(t) => s[t].kind == TokenKind::And, _ => false, }); if let Some(and_index) = and_index { if let Some(pred) = tree.nth_tree(and_index + 1) { let predicate_type = s.type_of(pred); if !s.can_convert(&predicate_type, &Type::Bool) { // TODO: TEST s.report_error_tree_ref( pred, format!("this predicate produces '{predicate_type}', but must produce bool"), ); } } } // TODO: Warn on constant match } fn check_match_body(s: &Semantics, t: TreeRef, _tree: &Tree) { let _ = s.type_of(t); // Checks arm count and compatibility. // TODO: completeness checks // https://doc.rust-lang.org/nightly/nightly-rustc/rustc_pattern_analysis/usefulness/index.html // let arms: Vec<_> = tree // .children_of_kind(&s.syntax_tree, TreeKind::MatchArm) // .collect(); // if arms.len() > 0 { // for arm in &arms[1..] { // // TODO: How do I know if it's complete? // // TODO: How do I know if it's redundant? // } // } } fn check_while_statement(s: &Semantics, tree: &Tree) { if let Some(expr) = tree.nth_tree(1) { let expr_type = s.type_of(expr); if !s.can_convert(&expr_type, &Type::Bool) { s.report_error_tree_ref( expr, format!("this condition produces '{expr_type}', but must produce bool"), ); } } } #[cfg(test)] mod tests { use super::*; use crate::parser::parse; #[test] #[should_panic(expected = "INTERNAL COMPILER ERROR: oh no")] pub fn ice() { let source: Rc = "1+1".into(); let (tree, lines) = parse(&source); let semantics = Semantics::new(ModuleId(0), "__test__".into(), source, tree.clone(), lines); semantics.internal_compiler_error(tree.root(), "oh no"); } }