use std::collections::HashMap; use std::rc::Rc; use crate::{ parser::{SyntaxTree, Tree, TreeKind, TreeRef}, semantics::{Declaration, Location, Semantics, Type}, tokens::TokenKind, }; // TODO: If I were cool this would by actual bytecode. // But I'm not cool. #[derive(Debug, Clone, Copy)] pub enum Instruction { Panic, BoolNot, Call(usize), CompareBool, CompareFloat, CompareString, Discard, Dup, FloatAdd, FloatDivide, FloatMultiply, FloatSubtract, Jump(usize), JumpFalse(usize), JumpTrue(usize), LoadArgument(usize), LoadExternFunction(usize), // NOTE: FUNKY, might want to indirect this index. LoadFunction(usize), LoadLocal(usize), LoadModule(usize), LoadSlot(usize), NewObject(usize), PushFalse, PushFloat(f64), PushNothing, PushString(usize), PushTrue, Return, StoreArgument(usize), StoreLocal(usize), StoreModule(usize), StringAdd, } pub enum Export { Function(usize), Global(usize), } pub struct Module { pub functions: Vec>, // Functions pub globals: usize, // The number of global variables pub exports: HashMap, // Exports by name pub init: usize, // The index of the initialization function } impl Module { pub fn new() -> Self { Module { functions: Vec::new(), globals: 0, exports: HashMap::new(), init: 0, } } pub fn functions(&self) -> &[Rc] { &self.functions } } // TODO: Debug information. pub struct Function { name: String, instructions: Vec, strings: Vec>, args: usize, // TODO: Probably type information too? locals: usize, // TODO: Same? } impl Function { pub fn new(name: &str, args: usize) -> Self { Function { name: name.to_string(), instructions: Vec::new(), strings: Vec::new(), args, locals: 0, } } pub fn name(&self) -> &str { &self.name } pub fn args(&self) -> usize { self.args } pub fn locals(&self) -> usize { self.locals } pub fn strings(&self) -> &[Rc] { &self.strings } pub fn instructions(&self) -> &[Instruction] { &self.instructions } } impl std::fmt::Debug for Function { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "fn {} ({} args, {} locals) ...", self.name, self.args, self.locals ) } } #[derive(Eq, PartialEq, Hash, Clone)] struct FunctionKey { tree: TreeRef, } struct Compiler<'a> { semantics: &'a Semantics<'a>, syntax: &'a SyntaxTree<'a>, function_bindings: HashMap, pending_functions: Vec<(FunctionKey, usize, Function)>, temp_functions: Vec>>, module: Module, function: Function, } impl<'a> Compiler<'a> { pub fn type_of(&self, t: TreeRef) -> Type { self.semantics.type_of(t) } fn add_string(&mut self, result: String) -> usize { let index = self.function.strings.len(); self.function.strings.push(result.into()); index } fn push(&mut self, inst: Instruction) -> usize { let index = self.function.instructions.len(); self.function.instructions.push(inst); index } fn patch(&mut self, i: usize, f: impl FnOnce(usize) -> Instruction) { let index = self.function.instructions.len(); self.function.instructions[i] = f(index); } } macro_rules! compiler_assert_eq { ($compiler:expr, $tr:expr, $ll:expr, $rr:expr $(,)?) => {{ let left = &$ll; let right = &$rr; if left != right { let semantics = $compiler.semantics; semantics.dump_compiler_state(Some($tr)); assert_eq!(left, right); } }}; ($compiler:expr, $tr:expr, $ll:expr, $rr:expr, $($t:tt)+) => {{ let left = &$ll; let right = &$rr; if left != right { let semantics = $compiler.semantics; semantics.dump_compiler_state(Some($tr)); assert_eq!(left, right, $($t)*); } }}; } macro_rules! compiler_assert { ($compiler:expr, $tr:expr, $cond:expr $(,)?) => {{ if !$cond { let semantics = $compiler.semantics; semantics.dump_compiler_state(Some($tr)); assert!($cond); } }}; ($compiler:expr, $tr:expr, $cond:expr, $($arg:tt)+) => {{ if !$cond { let semantics = $compiler.semantics; semantics.dump_compiler_state(Some($tr)); assert!($cond, $($arg)*); } }}; } macro_rules! ice { ($compiler: expr, $tr:expr, $($t:tt)+) => {{ let semantics = $compiler.semantics; semantics.dump_compiler_state(Some($tr)); panic!($($t)*) }} } macro_rules! inst_panic { ($($t:tt)+) => {{ // eprintln!($($t)*); Instruction::Panic }}; } // macro_rules! ice { // ($compiler:expr, $tr:expr, $($t:tt)*) => {{}}; // } pub fn compile(semantics: &Semantics) -> Rc { let mut compiler = Compiler { semantics, syntax: semantics.tree(), function_bindings: HashMap::new(), pending_functions: Vec::new(), temp_functions: Vec::new(), module: Module::new(), function: Function::new("<< module >>", 0), }; if let Some(t) = semantics.tree().root() { compiler.temp_functions.push(None); file(&mut compiler, t); compiler.temp_functions[0] = Some(Rc::new(compiler.function)); compiler.module.init = 0; } while let Some((fk, idx, func)) = compiler.pending_functions.pop() { if idx >= compiler.temp_functions.len() { compiler.temp_functions.resize(idx + 1, None); } compiler.function = func; compile_function(&mut compiler, fk.tree); compiler.temp_functions[idx] = Some(Rc::new(compiler.function)); } let mut module = compiler.module; for f in compiler.temp_functions { module.functions.push(f.unwrap()); } Rc::new(module) } fn file(c: &mut Compiler, t: TreeRef) { let tree = &c.syntax[t]; compiler_assert_eq!(c, t, tree.kind, TreeKind::File, "must be compiling a file"); let children: Vec<_> = tree.child_trees().collect(); if children.len() == 0 { c.push(Instruction::PushNothing); } else { for i in 0..children.len() - 1 { compile_statement(c, children[i], false); } compile_statement(c, *children.last().unwrap(), true); } c.push(Instruction::Return); } type CR = Option<()>; const OK: CR = CR::Some(()); fn compile_expression(c: &mut Compiler, t: TreeRef) { let tree = &c.syntax[t]; let cr = match tree.kind { TreeKind::Error => None, TreeKind::LiteralExpression => compile_literal(c, t, tree), TreeKind::GroupingExpression => compile_grouping(c, tree), TreeKind::UnaryExpression => compile_unary_operator(c, t, tree), TreeKind::ConditionalExpression => compile_condition_expression(c, tree), TreeKind::BinaryExpression => compile_binary_expression(c, t, tree), TreeKind::Identifier => compile_identifier_expression(c, t, tree), TreeKind::CallExpression => compile_call_expression(c, tree), TreeKind::Block => compile_block_expression(c, tree), TreeKind::Argument => compile_argument(c, tree), TreeKind::NewObjectExpression => compile_new_object_expression(c, t, tree), TreeKind::FieldValue => compile_field_value(c, t, tree), TreeKind::MemberAccess => compile_member_access(c, tree), TreeKind::SelfReference => compile_self_reference(c), _ => ice!(c, t, "{tree:?} is not an expression, cannot compile"), }; if matches!(cr, None) { c.push(inst_panic!("panic compiling expression {:?}", tree)); } } fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { let tok = tr.nth_token(0)?; match c.type_of(t) { Type::F64 => c.push(Instruction::PushFloat(tok.as_str().parse().unwrap())), Type::Bool => c.push(if tok.kind == TokenKind::True { Instruction::PushTrue } else { Instruction::PushFalse }), Type::String => { let mut result = String::new(); let mut input = tok.as_str().chars(); while let Some(ch) = input.next() { if ch == '\\' { if let Some(ch) = input.next() { match ch { 'n' => result.push('\n'), 'r' => result.push('\r'), 't' => result.push('\t'), _ => result.push(ch), } } else { result.push(ch) } } else { result.push(ch) } } let index = c.add_string(result); c.push(Instruction::PushString(index)) } Type::Error => c.push(inst_panic!("compiling literal {:?}", tr)), _ => ice!(c, t, "unsupported literal type: {t:?}"), }; OK } fn compile_grouping(c: &mut Compiler, t: &Tree) -> CR { compile_expression(c, t.nth_tree(1)?); OK } fn compile_unary_operator(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { compile_expression(c, tr.nth_tree(1)?); let tok = tr.nth_token(0)?; match tok.kind { TokenKind::Minus => { c.push(Instruction::PushFloat(-1.0)); c.push(Instruction::FloatMultiply); } TokenKind::Bang => { c.push(Instruction::BoolNot); } _ => ice!(c, t, "unsupported unary operator"), } OK } fn compile_condition_expression(c: &mut Compiler, t: &Tree) -> CR { let condition = t.nth_tree(1)?; compile_expression(c, condition); let jump_else_index = c.push(Instruction::JumpFalse(0)); let then_branch = t.nth_tree(2)?; compile_expression(c, then_branch); if let Some(else_branch) = t.nth_tree(4) { let jump_end_index = c.push(Instruction::Jump(0)); c.patch(jump_else_index, |i| Instruction::JumpFalse(i)); compile_expression(c, else_branch); c.patch(jump_end_index, |i| Instruction::Jump(i)); } else { c.patch(jump_else_index, |i| Instruction::JumpFalse(i)); } OK } fn compile_simple_binary_expression(c: &mut Compiler, tr: &Tree, f: T) -> CR where T: FnOnce(&mut Compiler, &Type) -> Instruction, { compile_expression(c, tr.nth_tree(0)?); let arg_tree = tr.nth_tree(2)?; let arg_type = c.semantics.type_of(arg_tree); compile_expression(c, arg_tree); let inst = f(c, &arg_type); c.push(inst); OK } fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { match tr.nth_token(1)?.kind { TokenKind::Plus => compile_simple_binary_expression(c, tr, |_, t| match t { Type::F64 => Instruction::FloatAdd, Type::String => Instruction::StringAdd, _ => inst_panic!("panic adding {}", t), }), TokenKind::Minus => { compile_simple_binary_expression(c, tr, |_, _| Instruction::FloatSubtract) } TokenKind::Star => { compile_simple_binary_expression(c, tr, |_, _| Instruction::FloatMultiply) } TokenKind::Slash => { compile_simple_binary_expression(c, tr, |_, _| Instruction::FloatDivide) } TokenKind::And => { compile_expression(c, tr.nth_tree(0)?); let jump_false_index = c.push(Instruction::JumpFalse(0)); c.push(Instruction::PushTrue); let jump_end_index = c.push(Instruction::Jump(0)); c.patch(jump_false_index, |i| Instruction::JumpFalse(i)); compile_expression(c, tr.nth_tree(2)?); c.patch(jump_end_index, |i| Instruction::Jump(i)); OK } TokenKind::Or => { compile_expression(c, tr.nth_tree(0)?); let jump_true_index = c.push(Instruction::JumpTrue(0)); c.push(Instruction::PushTrue); let jump_end_index = c.push(Instruction::Jump(0)); c.patch(jump_true_index, |i| Instruction::JumpTrue(i)); compile_expression(c, tr.nth_tree(2)?); c.patch(jump_end_index, |i| Instruction::Jump(i)); OK } TokenKind::EqualEqual => { compile_simple_binary_expression(c, tr, |c, arg_type| { if c.semantics.type_compat(&arg_type, &Type::Nothing) { c.push(Instruction::Discard); c.push(Instruction::Discard); Instruction::PushTrue } else { match arg_type { Type::F64 => Instruction::CompareFloat, Type::String => Instruction::CompareString, Type::Bool => Instruction::CompareBool, // ? _ => inst_panic!("panic comparing {}", arg_type), } } }) } TokenKind::Equal => { compile_expression(c, tr.nth_tree(2)?); c.push(Instruction::Dup); let lvalue = tr.nth_tree(0)?; let ltree = &c.syntax[lvalue]; match ltree.kind { TreeKind::Identifier => { let ident = ltree.nth_token(0)?; let environment = c.semantics.environment_of(lvalue); let declaration = environment.bind(ident)?; let instruction = match declaration { Declaration::Variable { location, index, .. } => { let index = *index; match location { Location::Argument => { compiler_assert!(c, t, index < c.function.args); Instruction::StoreArgument(index) } Location::Local => { if index >= c.function.locals { c.function.locals = index + 1; } Instruction::StoreLocal(index) } Location::Module => { compiler_assert!(c, t, index < c.module.globals); Instruction::StoreModule(index) } Location::Slot => { ice!(c, t, "cannot have an identifier lvalue bind to a slot"); } } } Declaration::ExternFunction { .. } => inst_panic!("store ext"), Declaration::Function { .. } => inst_panic!("store func"), Declaration::Class { .. } => inst_panic!("store class"), }; c.push(instruction); } // TODO: Member // TODO: List element _ => ice!(c, t, "Unsupported lvalue type"), } OK } _ => ice!(c, t, "Unsupported binary expression"), } } fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> Option<()> { let ident = tree.nth_token(0)?; let environment = c.semantics.environment_of(t); let declaration = environment.bind(ident)?; compile_load_declaration(c, t, declaration) } fn compile_load_declaration(c: &mut Compiler, t: TreeRef, declaration: &Declaration) -> CR { let instruction = match declaration { Declaration::Variable { location, index, .. } => { let index = *index; match location { Location::Local => { if index >= c.function.locals { c.function.locals = index + 1; } Instruction::LoadLocal(index) } Location::Argument => { compiler_assert!(c, t, index < c.function.args); Instruction::LoadArgument(index) } Location::Module => { compiler_assert!(c, t, index < c.module.globals); Instruction::LoadModule(index) } Location::Slot => { // TODO: Assert slot is in field range? Instruction::LoadSlot(index) } } } Declaration::Function { declaration, .. } => { let key = FunctionKey { tree: *declaration }; let index = match c.function_bindings.get(&key) { Some(index) => *index, None => { let tree = &c.syntax[*declaration]; compiler_assert_eq!(c, t, tree.kind, TreeKind::FunctionDecl); compile_function_declaration(c, *declaration, tree, false)?; match c.function_bindings.get(&key) { Some(index) => *index, None => { ice!( c, t, "did not compile the function with key {:?}!", declaration ) } } } }; Instruction::LoadFunction(index) } Declaration::ExternFunction { id, .. } => Instruction::LoadExternFunction(id.id()), // Must be a static don't worry about it. Declaration::Class { .. } => return OK, }; c.push(instruction); OK } fn compile_call_expression(c: &mut Compiler, tree: &Tree) -> CR { let arg_list = tree.child_tree_of_kind(c.syntax, TreeKind::ArgumentList)?; let mut args: Vec<_> = arg_list.child_trees().collect(); let arg_count = args.len(); args.reverse(); for arg in args { compile_expression(c, arg); } let func = tree.nth_tree(0)?; let func_type = c.semantics.type_of(func); let arg_count = match func_type { // TODO: Consider being guided by syntax here? Type::Method(..) => arg_count + 1, _ => arg_count, }; compile_expression(c, func); c.push(Instruction::Call(arg_count)); OK } fn compile_block_expression(c: &mut Compiler, tree: &Tree) -> CR { if tree.children.len() == 2 { c.push(Instruction::PushNothing); return OK; } let last_is_brace = tree.nth_token(tree.children.len() - 1).is_some(); let last_index = tree.children.len() - if last_is_brace { 2 } else { 1 }; for i in 1..last_index { compile_statement(c, tree.nth_tree(i)?, false); } compile_statement(c, tree.nth_tree(last_index)?, true); OK } fn compile_argument(c: &mut Compiler, tree: &Tree) -> CR { compile_expression(c, tree.nth_tree(0)?); OK } fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { // We pass in the arguments.... by... field order? let Type::Object(ct, _) = c.semantics.type_of(t) else { c.push(inst_panic!("new obj not ob")); return OK; }; let class = c.semantics.class_of(ct); let field_list = tree.child_tree_of_kind(c.syntax, TreeKind::FieldList)?; let mut field_bindings = HashMap::new(); for field in field_list.children_of_kind(c.syntax, TreeKind::FieldValue) { let f = &c.syntax[field]; let name = f.nth_token(0)?; field_bindings.insert(name.as_str(), field); } // The fields come in this order and since arguments are backwards // (stack!) we compile them in reverse order. Missing fields panic, // obviously. for field in class.fields.iter().rev() { let binding = field_bindings.get(&*field.name)?; compile_expression(c, *binding); } // Fetch the correct constructor. let type_reference = tree.child_tree_of_kind(c.syntax, TreeKind::TypeExpression)?; let identifier = type_reference.nth_token(0)?; let environment = c.semantics.environment_of(t); match environment.bind(identifier)? { Declaration::Class { declaration, .. } => { let key = FunctionKey { tree: *declaration }; let index = match c.function_bindings.get(&key) { Some(index) => *index, None => { let tree = &c.syntax[*declaration]; compiler_assert_eq!(c, t, tree.kind, TreeKind::ClassDecl); compile_class_declaration(c, t, tree, false)?; *c.function_bindings .get(&key) .expect("did not compile the class constructor!") } }; c.push(Instruction::LoadFunction(index)); } _ => return None, } c.push(Instruction::Call(class.fields.len())); OK } fn compile_field_value(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { if let Some(colon) = tree.nth_token(1) { if colon.kind == TokenKind::Colon { compile_expression(c, tree.nth_tree(2)?); return OK; } } // Form 2: { x, ... } let environment = c.semantics.environment_of(t); let id = tree.nth_token(0)?; let declaration = environment.bind(id)?; compile_load_declaration(c, t, declaration) } fn compile_member_access(c: &mut Compiler, tree: &Tree) -> CR { // In member access; the lhs sets up the object and in theory the rhs // binds against it. ::shrug:: // compile_expression(c, tree.nth_tree(0)?); // NOTE: If this is a method call we still don't have to do anything // special here, since the load of the member function will *not* // consume the self pointer from the stack. compile_expression(c, tree.nth_tree(2)?); OK } fn compile_self_reference(c: &mut Compiler) -> CR { c.push(Instruction::LoadArgument(0)); OK } fn compile_statement(c: &mut Compiler, t: TreeRef, gen_value: bool) { let tree = &c.semantics.tree()[t]; let cr = match tree.kind { TreeKind::FunctionDecl => compile_function_declaration(c, t, tree, gen_value), TreeKind::ClassDecl => compile_class_declaration(c, t, tree, gen_value), TreeKind::LetStatement => compile_let_statement(c, t, tree, gen_value), TreeKind::ExpressionStatement => compile_expression_statement(c, tree, gen_value), TreeKind::IfStatement => compile_if_statement(c, tree, gen_value), TreeKind::Block => compile_block_statement(c, t, gen_value), _ => ice!(c, t, "unsupported tree kind {:?}", tree.kind), }; if matches!(cr, None) { c.push(inst_panic!("stat {:?}", tree)); } } fn compile_if_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR { compile_expression(c, tree.nth_tree(0)?); if !gen_value { c.push(Instruction::Discard); } OK } fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR { if let Some(expr) = tree.nth_tree(0) { compile_expression(c, expr); if tree .nth_token(1) .is_some_and(|t| t.kind == TokenKind::Semicolon) { c.push(Instruction::Discard); if gen_value { c.push(Instruction::PushNothing); } } else if !gen_value { c.push(Instruction::Discard); } } else if gen_value { c.push(Instruction::PushNothing); }; OK } fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR { compile_expression(c, tree.nth_tree(3)?); let environment = c.semantics.environment_of(t); let declaration = environment.bind(tree.nth_token(1)?)?; let Declaration::Variable { location, index, .. } = declaration else { ice!(c, t, "let cannot make a non-variable declaration") }; let index = *index; let instruction = match location { Location::Local => { if index >= c.function.locals { c.function.locals = index + 1; } Instruction::StoreLocal(index) } Location::Module => { if index >= c.module.globals { c.module.globals = index + 1; } Instruction::StoreModule(index) } _ => ice!(c, t, "unsuitable location for let declaration"), }; c.push(instruction); if gen_value { c.push(Instruction::PushNothing); } OK } fn compile_function_declaration(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR { // Only compile a given function once. // // TODO: When it's time for generics, this should only actually compile // if we have no unbound type variables. let fk = FunctionKey { tree: t }; if !c.function_bindings.contains_key(&fk) { // TODO: If this is a method the name should be different. let name = tree.nth_token(1)?; let param_list = tree.child_tree_of_kind(c.syntax, TreeKind::ParamList)?; let param_count = param_list.children.len() - 2; let function_index = c.temp_functions.len(); c.temp_functions.push(None); c.pending_functions.push(( fk.clone(), function_index, Function::new(name.as_str(), param_count), )); c.function_bindings.insert(fk, function_index); c.module .exports .insert(name.to_string(), Export::Function(function_index)); } if gen_value { c.push(Instruction::PushNothing); } OK } fn compile_class_declaration(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR { // Only compile a given function once. // Classes get compiled as constructor functions which get called. let fk = FunctionKey { tree: t }; if !c.function_bindings.contains_key(&fk) { let name = tree.nth_token(1)?; let field_count = tree.children.len() - 2; let function_index = c.temp_functions.len(); c.temp_functions.push(None); c.pending_functions.push(( fk.clone(), function_index, Function::new(name.as_str(), field_count), )); c.function_bindings.insert(fk, function_index); c.module .exports .insert(name.to_string(), Export::Function(function_index)); } if gen_value { c.push(Instruction::PushNothing); } OK } fn compile_function(c: &mut Compiler, t: TreeRef) -> CR { let tree = &c.syntax[t]; match tree.kind { TreeKind::FunctionDecl => { let block = tree.child_of_kind(c.syntax, TreeKind::Block)?; compile_expression(c, block); } TreeKind::ClassDecl => { let count = tree.children_of_kind(c.syntax, TreeKind::FieldDecl).count(); for i in 0..count { c.push(Instruction::LoadArgument(count - 1 - i)); } let name = tree.nth_token(1)?.as_str(); let name_index = c.add_string(name.to_string()); c.push(Instruction::PushString(name_index)); c.push(Instruction::NewObject(count)); } _ => ice!(c, t, "what is this tree doing in compile_function?"), } c.push(Instruction::Return); OK } fn compile_block_statement(c: &mut Compiler, t: TreeRef, gen_value: bool) -> CR { compile_expression(c, t); if !gen_value { c.push(Instruction::Discard); } OK }