From fa53841af9ef1998a0e330dc02c9b64fdb2b65d4 Mon Sep 17 00:00:00 2001 From: John Doty Date: Tue, 9 Jan 2024 07:56:14 -0800 Subject: [PATCH] [fine] Starting to look like something --- fine/src/compiler.rs | 338 +++++++++++++++++----------- fine/src/parser.rs | 23 ++ fine/src/semantics.rs | 2 + fine/tests/expression/variable.fine | 9 +- 4 files changed, 232 insertions(+), 140 deletions(-) diff --git a/fine/src/compiler.rs b/fine/src/compiler.rs index 59c19d4a..aa640b74 100644 --- a/fine/src/compiler.rs +++ b/fine/src/compiler.rs @@ -1,5 +1,7 @@ +use std::collections::HashMap; + use crate::{ - parser::{Tree, TreeKind, TreeRef}, + parser::{SyntaxTree, Tree, TreeKind, TreeRef}, semantics::{Location, Semantics, Type}, tokens::TokenKind, }; @@ -29,49 +31,148 @@ pub enum Instruction { StoreLocal(usize), } +pub enum Export { + Function(usize), + Global(usize), +} + +pub struct Module { + pub functions: Vec, // Functions + pub globals: usize, // The number of global variables + pub exports: HashMap, // Exports by name + pub init: usize, // The index of the initialization function +} + +impl Module { + pub fn new() -> Self { + Module { + functions: Vec::new(), + globals: 0, + exports: HashMap::new(), + init: 0, + } + } +} + pub struct Function { + name: String, instructions: Vec, strings: Vec, + args: usize, // TODO: Probably type information too? + locals: usize, // TODO: Same? +} + +impl Function { + pub fn new(name: &str) -> Self { + Function { + name: name.to_string(), + instructions: Vec::new(), + strings: Vec::new(), + args: 0, + locals: 0, + } + } + + pub fn name(&self) -> &str { + &self.name + } +} + +struct Compiler<'a> { + semantics: &'a Semantics<'a>, + syntax: &'a SyntaxTree<'a>, + + module: Module, + function: Function, +} + +impl<'a> Compiler<'a> { + pub fn type_of(&self, t: TreeRef) -> Type { + self.semantics.type_of(t) + } + + fn add_string(&mut self, result: String) -> usize { + let index = self.function.strings.len(); + self.function.strings.push(result); + index + } + + fn push(&mut self, inst: Instruction) -> usize { + let index = self.function.instructions.len(); + self.function.instructions.push(inst); + index + } + + fn patch(&mut self, i: usize, f: impl FnOnce(usize) -> Instruction) { + let index = self.function.instructions.len(); + self.function.instructions[i] = f(index); + } +} + +pub fn compile(semantics: &Semantics) -> Module { + let mut compiler = Compiler { + semantics, + syntax: semantics.tree(), + module: Module::new(), + function: Function::new("<< module >>"), + }; + + if let Some(t) = semantics.tree().root() { + file(&mut compiler, t); + } + + let mut module = compiler.module; + let index = module.functions.len(); + module.functions.push(compiler.function); + module.init = index; + + module +} + +fn file(c: &mut Compiler, t: TreeRef) { + let tree = &c.syntax[t]; + assert_eq!(tree.kind, TreeKind::File); + for i in 0..tree.children.len() { + if let Some(t) = tree.nth_tree(i) { + compile_statement(c, t, false); + } + } } type CR = Option<()>; const OK: CR = CR::Some(()); -pub fn compile_expression(code: &mut Function, semantics: &Semantics, t: TreeRef) { - let tree = &semantics.tree()[t]; +fn compile_expression(c: &mut Compiler, t: TreeRef) { + let tree = &c.syntax[t]; let cr = match tree.kind { TreeKind::Error => None, - TreeKind::LiteralExpression => compile_literal(code, semantics, t, tree), - TreeKind::GroupingExpression => compile_grouping(code, semantics, tree), - TreeKind::UnaryExpression => compile_unary_operator(code, semantics, tree), - TreeKind::ConditionalExpression => compile_condition_expression(code, semantics, tree), - TreeKind::BinaryExpression => compile_binary_expression(code, semantics, tree), - TreeKind::Identifier => compile_identifier_expression(code, semantics, t, tree), + TreeKind::LiteralExpression => compile_literal(c, t, tree), + TreeKind::GroupingExpression => compile_grouping(c, tree), + TreeKind::UnaryExpression => compile_unary_operator(c, tree), + TreeKind::ConditionalExpression => compile_condition_expression(c, tree), + TreeKind::BinaryExpression => compile_binary_expression(c, tree), + TreeKind::Identifier => compile_identifier_expression(c, t, tree), TreeKind::CallExpression => todo!(), - TreeKind::Block => compile_block_expression(code, semantics, tree), - _ => { - semantics.internal_compiler_error(Some(t), "tree is not an expression, cannot compile") - } + TreeKind::Block => compile_block_expression(c, tree), + _ => c + .semantics + .internal_compiler_error(Some(t), "tree is not an expression, cannot compile"), }; if matches!(cr, None) { - code.instructions.push(Instruction::Panic); + c.push(Instruction::Panic); } } -fn compile_literal(code: &mut Function, semantics: &Semantics, t: TreeRef, tr: &Tree) -> CR { +fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { let tok = tr.nth_token(0)?; - match semantics.type_of(t) { - Type::F64 => code - .instructions - .push(Instruction::PushFloat(tok.as_str().parse().unwrap())), - Type::Bool => code.instructions.push(if tok.kind == TokenKind::True { + match c.type_of(t) { + Type::F64 => c.push(Instruction::PushFloat(tok.as_str().parse().unwrap())), + Type::Bool => c.push(if tok.kind == TokenKind::True { Instruction::PushTrue } else { Instruction::PushFalse }), Type::String => { - let index = code.strings.len(); - // TODO: Interpret string here make good! let mut result = String::new(); let mut input = tok.as_str().chars(); @@ -91,232 +192,195 @@ fn compile_literal(code: &mut Function, semantics: &Semantics, t: TreeRef, tr: & result.push(ch) } } - code.strings.push(result); - - code.instructions.push(Instruction::PushString(index)) + let index = c.add_string(result); + c.push(Instruction::PushString(index)) } - Type::Error => code.instructions.push(Instruction::Panic), + Type::Error => c.push(Instruction::Panic), _ => panic!("unsupported literal type: {t:?}"), }; OK } -fn compile_grouping(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR { - compile_expression(code, semantics, t.nth_tree(1)?); +fn compile_grouping(c: &mut Compiler, t: &Tree) -> CR { + compile_expression(c, t.nth_tree(1)?); OK } -fn compile_unary_operator(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR { - compile_expression(code, semantics, t.nth_tree(1)?); +fn compile_unary_operator(c: &mut Compiler, t: &Tree) -> CR { + compile_expression(c, t.nth_tree(1)?); let tok = t.nth_token(0)?; match tok.kind { TokenKind::Minus => { - code.instructions.push(Instruction::PushFloat(-1.0)); - code.instructions.push(Instruction::FloatMultiply); + c.push(Instruction::PushFloat(-1.0)); + c.push(Instruction::FloatMultiply); } TokenKind::Bang => { - code.instructions.push(Instruction::BoolNot); + c.push(Instruction::BoolNot); } _ => panic!("unsupported unary operator"), } OK } -fn compile_condition_expression(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR { +fn compile_condition_expression(c: &mut Compiler, t: &Tree) -> CR { let condition = t.nth_tree(1)?; - compile_expression(code, semantics, condition); + compile_expression(c, condition); - let jump_else_index = code.instructions.len(); - code.instructions.push(Instruction::JumpFalse(0)); + let jump_else_index = c.push(Instruction::JumpFalse(0)); let then_branch = t.nth_tree(2)?; - compile_expression(code, semantics, then_branch); + compile_expression(c, then_branch); if let Some(else_branch) = t.nth_tree(4) { - let jump_end_index = code.instructions.len(); - code.instructions.push(Instruction::Jump(0)); + let jump_end_index = c.push(Instruction::Jump(0)); + c.patch(jump_else_index, |i| Instruction::JumpFalse(i)); - let else_index = code.instructions.len(); - code.instructions[jump_else_index] = Instruction::JumpFalse(else_index); - - compile_expression(code, semantics, else_branch); - - let end_index = code.instructions.len(); - code.instructions[jump_end_index] = Instruction::Jump(end_index); + compile_expression(c, else_branch); + c.patch(jump_end_index, |i| Instruction::Jump(i)); } else { - let else_index = code.instructions.len(); - code.instructions[jump_else_index] = Instruction::JumpFalse(else_index); + c.patch(jump_else_index, |i| Instruction::JumpFalse(i)); } OK } -fn compile_binary_expression(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR { - compile_expression(code, semantics, t.nth_tree(0)?); +fn compile_binary_expression(c: &mut Compiler, t: &Tree) -> CR { + compile_expression(c, t.nth_tree(0)?); match t.nth_token(1)?.kind { TokenKind::Plus => { - compile_expression(code, semantics, t.nth_tree(2)?); - code.instructions.push(Instruction::FloatAdd); + compile_expression(c, t.nth_tree(2)?); + c.push(Instruction::FloatAdd); } TokenKind::Minus => { - compile_expression(code, semantics, t.nth_tree(2)?); - code.instructions.push(Instruction::FloatSubtract); + compile_expression(c, t.nth_tree(2)?); + c.push(Instruction::FloatSubtract); } TokenKind::Star => { - compile_expression(code, semantics, t.nth_tree(2)?); - code.instructions.push(Instruction::FloatMultiply); + compile_expression(c, t.nth_tree(2)?); + c.push(Instruction::FloatMultiply); } TokenKind::Slash => { - compile_expression(code, semantics, t.nth_tree(2)?); - code.instructions.push(Instruction::FloatDivide); + compile_expression(c, t.nth_tree(2)?); + c.push(Instruction::FloatDivide); } TokenKind::And => { - let jump_false_index = code.instructions.len(); - code.instructions.push(Instruction::JumpFalse(0)); - code.instructions.push(Instruction::PushTrue); + let jump_false_index = c.push(Instruction::JumpFalse(0)); - let jump_end_index = code.instructions.len(); - code.instructions.push(Instruction::Jump(0)); + c.push(Instruction::PushTrue); + let jump_end_index = c.push(Instruction::Jump(0)); - let false_index = code.instructions.len(); - code.instructions[jump_false_index] = Instruction::JumpFalse(false_index); + c.patch(jump_false_index, |i| Instruction::JumpFalse(i)); - compile_expression(code, semantics, t.nth_tree(2)?); + compile_expression(c, t.nth_tree(2)?); - let end_index = code.instructions.len(); - code.instructions[jump_end_index] = Instruction::Jump(end_index); + c.patch(jump_end_index, |i| Instruction::Jump(i)); } TokenKind::Or => { - let jump_true_index = code.instructions.len(); - code.instructions.push(Instruction::JumpTrue(0)); - code.instructions.push(Instruction::PushTrue); + let jump_true_index = c.push(Instruction::JumpTrue(0)); - let jump_end_index = code.instructions.len(); - code.instructions.push(Instruction::Jump(0)); + c.push(Instruction::PushTrue); + let jump_end_index = c.push(Instruction::Jump(0)); - let true_index = code.instructions.len(); - code.instructions[jump_true_index] = Instruction::JumpTrue(true_index); + c.patch(jump_true_index, |i| Instruction::JumpTrue(i)); - compile_expression(code, semantics, t.nth_tree(2)?); + compile_expression(c, t.nth_tree(2)?); - let end_index = code.instructions.len(); - code.instructions[jump_end_index] = Instruction::Jump(end_index); + c.patch(jump_end_index, |i| Instruction::Jump(i)); } _ => panic!("Unsupported binary expression"), } OK } -fn compile_identifier_expression( - code: &mut Function, - semantics: &Semantics, - t: TreeRef, - tree: &Tree, -) -> Option<()> { +fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> Option<()> { let ident = tree.nth_token(0)?; - let environment = semantics.environment_of(t); + let environment = c.semantics.environment_of(t); let declaration = environment.bind(ident)?; let instruction = match declaration.location { - Location::Local => Instruction::LoadLocal(declaration.index), - Location::Argument => Instruction::LoadArgument(declaration.index), + Location::Local => { + if declaration.index >= c.function.locals { + c.function.locals = declaration.index + 1; + } + Instruction::LoadLocal(declaration.index) + } + Location::Argument => { + assert!(declaration.index < c.function.args); + Instruction::LoadArgument(declaration.index) + } Location::Module => Instruction::LoadModule(declaration.index), }; - code.instructions.push(instruction); + c.push(instruction); OK } -fn compile_block_expression(code: &mut Function, semantics: &Semantics, tree: &Tree) -> Option<()> { +fn compile_block_expression(c: &mut Compiler, tree: &Tree) -> Option<()> { let last_is_brace = tree.nth_token(tree.children.len() - 1).is_some(); let last_index = tree.children.len() - if last_is_brace { 2 } else { 1 }; for i in 1..last_index { - compile_statement(code, semantics, tree.nth_tree(i)?, false); + compile_statement(c, tree.nth_tree(i)?, false); } - compile_statement(code, semantics, tree.nth_tree(last_index)?, true); + compile_statement(c, tree.nth_tree(last_index)?, true); OK } -pub fn compile_statement(code: &mut Function, semantics: &Semantics, t: TreeRef, gen_value: bool) { - let tree = &semantics.tree()[t]; +fn compile_statement(c: &mut Compiler, t: TreeRef, gen_value: bool) { + let tree = &c.semantics.tree()[t]; let cr = match tree.kind { - TreeKind::FunctionDecl => compile_function_declaration(code, semantics, tree, gen_value), - TreeKind::LetStatement => compile_let_statement(code, semantics, t, tree, gen_value), - TreeKind::ExpressionStatement => { - compile_expression_statement(code, semantics, tree, gen_value) - } - TreeKind::IfStatement => compile_if_statement(code, semantics, tree, gen_value), + TreeKind::FunctionDecl => compile_function_declaration(c, tree, gen_value), + TreeKind::LetStatement => compile_let_statement(c, t, tree, gen_value), + TreeKind::ExpressionStatement => compile_expression_statement(c, tree, gen_value), + TreeKind::IfStatement => compile_if_statement(c, tree, gen_value), _ => panic!("unsupported tree kind {:?}", tree.kind), }; if matches!(cr, None) { - code.instructions.push(Instruction::Panic); + c.push(Instruction::Panic); } } -fn compile_if_statement( - code: &mut Function, - semantics: &Semantics, - tree: &Tree, - gen_value: bool, -) -> CR { - compile_expression(code, semantics, tree.nth_tree(0)?); +fn compile_if_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR { + compile_expression(c, tree.nth_tree(0)?); if !gen_value { - code.instructions.push(Instruction::Discard); + c.push(Instruction::Discard); } OK } -fn compile_expression_statement( - code: &mut Function, - semantics: &Semantics, - tree: &Tree, - gen_value: bool, -) -> CR { - compile_expression(code, semantics, tree.nth_tree(0)?); +fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR { + compile_expression(c, tree.nth_tree(0)?); if tree .nth_token(1) .is_some_and(|t| t.kind == TokenKind::Semicolon) { - code.instructions.push(Instruction::Discard); + c.push(Instruction::Discard); if gen_value { - code.instructions.push(Instruction::PushNothing); + c.push(Instruction::PushNothing); } } else if !gen_value { - code.instructions.push(Instruction::Discard); + c.push(Instruction::Discard); } OK } -fn compile_let_statement( - code: &mut Function, - semantics: &Semantics, - t: TreeRef, - tree: &Tree, - gen_value: bool, -) -> CR { - compile_expression(code, semantics, tree.nth_tree(3)?); - let environment = semantics.environment_of(t); +fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR { + compile_expression(c, tree.nth_tree(3)?); + let environment = c.semantics.environment_of(t); let declaration = environment.bind(tree.nth_token(1)?)?; // NOTE: Because this is a let statement I assume it's local! assert!(matches!(declaration.location, Location::Local)); - code.instructions - .push(Instruction::StoreLocal(declaration.index)); + c.push(Instruction::StoreLocal(declaration.index)); if gen_value { - code.instructions.push(Instruction::PushNothing); + c.push(Instruction::PushNothing); } OK } -fn compile_function_declaration( - _code: &mut Function, - _semantics: &Semantics, - _tree: &Tree, - _gen_value: bool, -) -> CR { +fn compile_function_declaration(_c: &mut Compiler, _tree: &Tree, _gen_value: bool) -> CR { todo!() } diff --git a/fine/src/parser.rs b/fine/src/parser.rs index 13cdd620..8f71fbe7 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -138,6 +138,7 @@ pub enum TreeKind { BinaryExpression, IfStatement, Identifier, + PrintStatement, } pub struct Tree<'a> { @@ -555,10 +556,32 @@ fn statement(p: &mut CParser) { // require a semicolon at the end if it's all by itself. TokenKind::If => statement_if(p), + TokenKind::Print => statement_print(p), + _ => statement_expression(p), } } +fn statement_print(p: &mut CParser) { + assert!(p.at(TokenKind::Print)); + let m = p.start(); + + p.expect( + TokenKind::Print, + "expect 'print' to start a print statement", + ); + p.expect(TokenKind::LeftParen, "expect '(' to start a print"); + if !p.at(TokenKind::RightParen) { + expression(p); + } + p.expect(TokenKind::RightParen, "expect ')' after a print statement"); + if !p.at(TokenKind::RightBrace) { + p.expect(TokenKind::Semicolon, "expect ';' to end a print statement"); + } + + p.end(m, TreeKind::PrintStatement); +} + fn statement_if(p: &mut CParser) { assert!(p.at(TokenKind::If)); let m = p.start(); diff --git a/fine/src/semantics.rs b/fine/src/semantics.rs index 9ff7bda1..7c72a8d1 100644 --- a/fine/src/semantics.rs +++ b/fine/src/semantics.rs @@ -4,6 +4,8 @@ use crate::{ }; use std::{cell::RefCell, collections::HashMap, fmt, rc::Rc}; +// TODO: Unused variables? + // TODO: An error should have: // // - a start diff --git a/fine/tests/expression/variable.fine b/fine/tests/expression/variable.fine index 455da6e6..f1b002e8 100644 --- a/fine/tests/expression/variable.fine +++ b/fine/tests/expression/variable.fine @@ -18,14 +18,17 @@ // | LiteralExpression // | Number:'"2"' // | Semicolon:'";"' -// | ExpressionStatement +// | PrintStatement +// | Print:'"print"' +// | LeftParen:'"("' // | Identifier // | Identifier:'"y"' +// | RightParen:'")"' // | Semicolon:'";"' // | let x = 23; let y = x * 2; -y; +print(y); -// @type: 590 f64 +// @type: 667 f64