oden/fine/src/compiler.rs
John Doty 0b0b5d72d0 [fine] Tokens are by reference, ephemera
It's another jump, perhaps, but smaller arrays, and now we can track
ephemera efficiently without bloating child trees. (We could also
put ephemera inline with the child trees but then nth_token would be
unwieldy, and it would lower our data density.)
2024-04-06 17:39:33 -07:00

1369 lines
44 KiB
Rust

use std::collections::HashMap;
use std::rc::Rc;
use crate::{
parser::{Child, SyntaxTree, TokenRef, Tree, TreeKind, TreeRef},
semantics::{
string_constant_to_string, Declaration, Location, ModuleId, Origin, Semantics, Type,
},
tokens::{Token, TokenKind},
};
pub const EXTERN_BUILTIN_NOOP: usize = 0;
pub const EXTERN_BUILTIN_LIST_GET_ITERATOR: usize = 1;
pub const EXTERN_BUILTIN_LIST_ITERATOR_NEXT: usize = 2;
pub const EXTERN_USER_FIRST: usize = 100000;
// TODO: If I were cool this would by actual bytecode.
// But I'm not cool.
#[derive(Debug, Clone, Copy)]
pub enum Instruction {
Panic(usize),
BoolNot,
Call(usize),
Discard,
Dup,
EqBool,
EqFloat,
EqString,
FloatAdd,
FloatDivide,
FloatMultiply,
FloatSubtract,
GreaterFloat,
GreaterString,
IsBool,
IsClass(i64),
IsFloat,
IsNothing,
IsString,
Jump(usize),
JumpFalse(usize),
JumpTrue(usize), // TODO: Only one of these, and use BoolNot?
LessFloat,
LessString,
LoadArgument(usize),
LoadExternFunction(usize), // NOTE: FUNKY, might want to indirect this index.
LoadFunction(usize),
LoadLocal(usize),
LoadModule(usize),
LoadSlot(usize),
NewObject(usize),
PushFalse,
PushFloat(f64),
PushInt(i64),
PushNothing,
PushString(usize),
PushTrue,
Return,
StoreArgument(usize),
StoreLocal(usize),
StoreModule(usize),
StoreSlot(usize),
StringAdd,
NewList(usize),
ModulePrefix(ModuleId),
}
pub enum Export {
Function(usize),
Global(usize),
}
pub struct CompiledModule {
pub id: ModuleId,
pub functions: Vec<Rc<Function>>, // Functions
pub globals: usize, // The number of global variables
pub exports: HashMap<String, Export>, // Exports by name
pub init: usize, // The index of the initialization function
pub deps: Vec<ModuleId>, // Modules I depend on
}
impl CompiledModule {
pub fn new(id: ModuleId) -> Self {
CompiledModule {
id,
functions: Vec::new(),
globals: 0,
exports: HashMap::new(),
init: 0,
deps: Vec::new(),
}
}
pub fn init_function(&self) -> &Rc<Function> {
&self.functions[self.init]
}
pub fn functions(&self) -> &[Rc<Function>] {
&self.functions
}
}
// TODO: Debug information.
pub struct Function {
name: String,
instructions: Vec<Instruction>,
strings: Vec<Rc<str>>,
args: usize, // TODO: Probably type information too?
locals: usize, // TODO: Same?
}
impl Function {
pub fn new(name: &str, args: usize) -> Self {
Function {
name: name.to_string(),
instructions: Vec::new(),
strings: Vec::new(),
args,
locals: 0,
}
}
pub fn name(&self) -> &str {
&self.name
}
pub fn args(&self) -> usize {
self.args
}
pub fn locals(&self) -> usize {
self.locals
}
pub fn strings(&self) -> &[Rc<str>] {
&self.strings
}
pub fn instructions(&self) -> &[Instruction] {
&self.instructions
}
}
impl std::fmt::Debug for Function {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"fn {} ({} args, {} locals) ...",
self.name, self.args, self.locals
)
}
}
struct Compiler<'a> {
source: &'a str,
semantics: &'a Semantics,
syntax: &'a SyntaxTree,
module: CompiledModule,
function: Function,
}
impl<'a> std::ops::Index<TreeRef> for Compiler<'a> {
type Output = Tree;
#[inline]
fn index(&self, index: TreeRef) -> &Self::Output {
&self.syntax[index]
}
}
impl<'a> std::ops::Index<&TreeRef> for Compiler<'a> {
type Output = Tree;
#[inline]
fn index(&self, index: &TreeRef) -> &Self::Output {
&self.syntax[index]
}
}
impl<'a> std::ops::Index<TokenRef> for Compiler<'a> {
type Output = Token;
#[inline]
fn index(&self, index: TokenRef) -> &Self::Output {
&self.syntax[index]
}
}
impl<'a> std::ops::Index<&TokenRef> for Compiler<'a> {
type Output = Token;
#[inline]
fn index(&self, index: &TokenRef) -> &Self::Output {
&self.syntax[index]
}
}
impl<'a> Compiler<'a> {
fn add_string(&mut self, result: String) -> usize {
let index = self.function.strings.len();
self.function.strings.push(result.into());
index
}
fn push(&mut self, inst: Instruction) -> usize {
let index = self.function.instructions.len();
self.function.instructions.push(inst);
index
}
fn inst_panic<T>(&mut self, description: T) -> Instruction
where
T: Into<String>,
{
// TODO: We should be looking for semantic errors and using *those*
// as the panic description and only fall back to the provided
// description if we can't find a semantic error. The idea is
// that if the compiler got confused it *might* be because
// there was actually a semantic error in the program and that
// semantic error might be a better description of what is
// wrong.
let index = self.add_string(description.into());
Instruction::Panic(index)
}
fn push_panic<T>(&mut self, description: T) -> usize
where
T: Into<String>,
{
let instruction = self.inst_panic(description);
self.push(instruction)
}
fn patch(&mut self, i: usize, f: impl FnOnce(usize) -> Instruction) {
let index = self.function.instructions.len();
self.function.instructions[i] = f(index);
}
}
macro_rules! compiler_assert_eq {
($compiler:expr, $tr:expr, $ll:expr, $rr:expr $(,)?) => {{
let left = &$ll;
let right = &$rr;
if left != right {
$compiler.semantics.dump_compiler_state(Some($tr));
assert_eq!(left, right);
}
}};
($compiler:expr, $tr:expr, $ll:expr, $rr:expr, $($t:tt)+) => {{
let left = &$ll;
let right = &$rr;
if left != right {
$compiler.semantics.dump_compiler_state(Some($tr));
assert_eq!(left, right, $($t)*);
}
}};
}
macro_rules! compiler_assert {
($compiler:expr, $tr:expr, $cond:expr $(,)?) => {{
if !$cond {
$compiler.semantics.dump_compiler_state(Some($tr));
assert!($cond);
}
}};
($compiler:expr, $tr:expr, $cond:expr, $($arg:tt)+) => {{
if !$cond {
$compiler.semantics.dump_compiler_state(Some($tr));
assert!($cond, $($arg)*);
}
}};
}
macro_rules! ice {
($compiler:expr, $tr:expr, $($t:tt)+) => {{
$compiler.semantics.dump_compiler_state(Some($tr));
panic!($($t)*)
}}
}
type CR = Result<(), &'static str>;
const OK: CR = CR::Ok(());
fn function_from_function_decl(
source: &str,
syntax: &SyntaxTree,
tree: &Tree,
) -> Result<Function, &'static str> {
// TODO: If this is a method the name should be different.
let name = syntax[tree.nth_token(1).ok_or("no id")?].as_str(source);
let param_list = tree
.child_tree_of_kind(syntax, TreeKind::ParamList)
.ok_or("no paramlist")?;
let param_count = param_list.children.len() - 2;
Ok(Function::new(name, param_count))
}
fn function_from_class_decl(
source: &str,
syntax: &SyntaxTree,
tree: &Tree,
) -> Result<Function, &'static str> {
let name = syntax[tree.nth_token(1).ok_or("no name")?].as_str(source);
// TODO: I think this is incorrect!
let field_count = tree.children.len() - 2;
Ok(Function::new(name, field_count))
}
pub fn compile_module(semantics: &Semantics) -> Rc<CompiledModule> {
let source = semantics.source();
let syntax_tree = semantics.tree();
let mut compiler = Compiler {
source: &source,
semantics: &semantics,
syntax: &syntax_tree,
module: CompiledModule::new(semantics.mid()),
function: Function::new("<< module >>", 0),
};
let mut functions = vec![None; semantics.function_count() + 1];
if let Some(t) = semantics.tree().root() {
file(&mut compiler, t);
let index = functions.len() - 1;
functions[index] = Some(Rc::new(compiler.function));
compiler.module.init = index;
}
for t in semantics.tree().trees() {
if let Some(function_index) = semantics.get_function_index(t) {
let tree = &semantics.tree()[t];
let function = match tree.kind {
TreeKind::FunctionDecl => function_from_function_decl(&source, &syntax_tree, tree),
TreeKind::ClassDecl => function_from_class_decl(&source, &syntax_tree, tree),
_ => Err("don't know how to make a function of this"),
};
if let Ok(function) = function {
compiler.function = function;
let _ = compile_function(&mut compiler, t);
let function = Rc::new(compiler.function);
compiler
.module
.exports
.insert(function.name.clone(), Export::Function(function_index));
functions[function_index] = Some(function);
}
}
}
let mut module = compiler.module;
for f in functions {
module.functions.push(f.unwrap());
}
module.deps.append(&mut semantics.import_ids());
Rc::new(module)
}
fn file(c: &mut Compiler, t: TreeRef) {
let tree = &c[t];
compiler_assert_eq!(c, t, tree.kind, TreeKind::File, "must be compiling a file");
let children: Vec<_> = tree.child_trees().collect();
if children.len() == 0 {
c.push(Instruction::PushNothing);
} else {
for i in 0..children.len() - 1 {
compile_statement(c, children[i], false);
}
compile_statement(c, *children.last().unwrap(), true);
}
c.push(Instruction::Return);
}
fn compile_expression(c: &mut Compiler, t: TreeRef) {
let tree = &c.syntax[t];
let cr = match tree.kind {
TreeKind::Error => Err("error tree"),
TreeKind::Argument => compile_argument(c, tree),
TreeKind::BinaryExpression => compile_binary_expression(c, t, tree),
TreeKind::Block => compile_block_expression(c, tree),
TreeKind::CallExpression => compile_call_expression(c, tree),
TreeKind::ConditionalExpression => compile_condition_expression(c, tree),
TreeKind::FieldValue => compile_field_value(c, t, tree),
TreeKind::GroupingExpression => compile_grouping(c, tree),
TreeKind::Identifier => compile_identifier_expression(c, t, tree),
TreeKind::IsExpression => compile_is_expression(c, tree),
TreeKind::ListConstructor => compile_list_constructor(c, tree),
TreeKind::ListConstructorElement => compile_list_constructor_element(c, tree),
TreeKind::LiteralExpression => compile_literal(c, t, tree),
TreeKind::MemberAccess => compile_member_access(c, t, tree),
TreeKind::NewObjectExpression => compile_new_object_expression(c, t, tree),
TreeKind::SelfReference => compile_self_reference(c),
TreeKind::UnaryExpression => compile_unary_operator(c, t, tree),
TreeKind::MatchExpression => compile_match_expression(c, tree),
_ => ice!(c, t, "{tree:?} is not an expression, cannot compile"),
};
if let Err(m) = cr {
c.push_panic(format!("panic compiling expression {:?}: {m}", tree));
}
}
fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
let tok = &c[tr.nth_token(0).ok_or("no token")?];
match c.semantics.type_of(t) {
Type::F64 => c.push(Instruction::PushFloat(
tok.as_str(c.source).parse().unwrap(),
)),
Type::Bool => c.push(if tok.kind == TokenKind::True {
Instruction::PushTrue
} else {
Instruction::PushFalse
}),
Type::String => {
let result = string_constant_to_string(tok.as_str(c.source));
let index = c.add_string(result);
c.push(Instruction::PushString(index))
}
Type::Error(e) => c.push_panic(format!("compiling literal {:?}: {e}", tr)),
_ => ice!(c, t, "unsupported literal type: {t:?}"),
};
OK
}
fn compile_grouping(c: &mut Compiler, t: &Tree) -> CR {
compile_expression(c, t.nth_tree(1).ok_or("unexpected tree")?);
OK
}
fn compile_unary_operator(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
compile_expression(c, tr.nth_tree(1).ok_or("no arg")?);
let tok = &c[tr.nth_token(0).ok_or("no op")?];
match tok.kind {
TokenKind::Minus => {
c.push(Instruction::PushFloat(-1.0));
c.push(Instruction::FloatMultiply);
}
TokenKind::Bang => {
c.push(Instruction::BoolNot);
}
_ => ice!(c, t, "unsupported unary operator"),
}
OK
}
fn compile_condition_expression(c: &mut Compiler, t: &Tree) -> CR {
let condition = t.nth_tree(1).ok_or("no cond")?;
compile_expression(c, condition);
let jump_else_index = c.push(Instruction::JumpFalse(0));
let then_branch = t.nth_tree(2).ok_or("no then")?;
compile_expression(c, then_branch);
let jump_end_index = c.push(Instruction::Jump(0));
c.patch(jump_else_index, |i| Instruction::JumpFalse(i));
if let Some(else_branch) = t.nth_tree(4) {
compile_expression(c, else_branch);
} else {
c.push(Instruction::PushNothing);
}
c.patch(jump_end_index, |i| Instruction::Jump(i));
OK
}
fn compile_simple_binary_expression<T>(c: &mut Compiler, tr: &Tree, f: T) -> CR
where
T: FnOnce(&mut Compiler, &Type) -> Instruction,
{
compile_expression(c, tr.nth_tree(0).ok_or("no lhs")?);
let arg_tree = tr.nth_tree(2).ok_or("no rhs")?;
let arg_type = c.semantics.type_of(arg_tree);
compile_expression(c, arg_tree);
let inst = f(c, &arg_type);
c.push(inst);
OK
}
fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
let op = &c[tr.nth_token(1).ok_or("no op")?];
match op.kind {
TokenKind::Plus => compile_simple_binary_expression(c, tr, |c, t| match t {
Type::F64 => Instruction::FloatAdd,
Type::String => Instruction::StringAdd,
_ => c.inst_panic(format!("panic adding {}", t)),
}),
TokenKind::Minus => {
compile_simple_binary_expression(c, tr, |_, _| Instruction::FloatSubtract)
}
TokenKind::Star => {
compile_simple_binary_expression(c, tr, |_, _| Instruction::FloatMultiply)
}
TokenKind::Slash => {
compile_simple_binary_expression(c, tr, |_, _| Instruction::FloatDivide)
}
TokenKind::Less => compile_simple_binary_expression(c, tr, |c, t| match t {
Type::F64 => Instruction::LessFloat,
Type::String => Instruction::LessString,
_ => c.inst_panic(format!("panic less {}", t)),
}),
TokenKind::LessEqual => {
compile_simple_binary_expression(c, tr, |c, t| match t {
Type::F64 => Instruction::GreaterFloat,
Type::String => Instruction::GreaterString,
_ => c.inst_panic(format!("panic less equal {}", t)),
})?;
c.push(Instruction::BoolNot);
OK
}
TokenKind::Greater => compile_simple_binary_expression(c, tr, |c, t| match t {
Type::F64 => Instruction::GreaterFloat,
Type::String => Instruction::GreaterString,
_ => c.inst_panic(format!("panic greater {}", t)),
}),
TokenKind::GreaterEqual => {
compile_simple_binary_expression(c, tr, |c, t| match t {
Type::F64 => Instruction::LessFloat,
Type::String => Instruction::LessString,
_ => c.inst_panic(format!("panic greater equal {}", t)),
})?;
c.push(Instruction::BoolNot);
OK
}
TokenKind::And => {
// Compile the left hand side, it leaves a bool on the stack
compile_expression(c, tr.nth_tree(0).ok_or("no lhs")?);
// If the first part is true (hooray!) then we need to evaluate
// the right hand side, so jump around the short circuit...
let jump_true_index = c.push(Instruction::JumpTrue(0));
// ...but if the first part is false then we stop here. We need
// to leave a value on the stack (it was consumed by jump above)
// so we push an extra False here, and jump to the end.
c.push(Instruction::PushFalse);
let jump_end_index = c.push(Instruction::Jump(0));
// Here we are, we consumed the `true` off the stack now time to
// do the right hand side.
c.patch(jump_true_index, |i| Instruction::JumpTrue(i));
// The right hand side leaves true or false on the stack, it's
// the result of the expression.
compile_expression(c, tr.nth_tree(2).ok_or("no rhs")?);
// (here's where you go after you leave the "false" on the stack.)
c.patch(jump_end_index, |i| Instruction::Jump(i));
OK
}
TokenKind::Or => {
// Compile the left hand side, it leaves a bool on the stack
compile_expression(c, tr.nth_tree(0).ok_or("no lhs")?);
// If the first part is false (boo!) then we need to evaluate the
// right hand side, so jump around the short circuit...
let jump_false_index = c.push(Instruction::JumpFalse(0));
// ...but if the first part os true then we stop here. We need to
// leave a value on the stack (it was consumed by jump above) so
// we push an extra True here and jump to the end.
c.push(Instruction::PushTrue);
let jump_end_index = c.push(Instruction::Jump(0));
// Here we are, we consumed the `false` off the stack now time to
// do the right hand side.
c.patch(jump_false_index, |i| Instruction::JumpFalse(i));
// The right hand side leaves true or false on the stack, it's
// the result of the expression.
compile_expression(c, tr.nth_tree(2).ok_or("no rhs")?);
// (here's where you go after you leave "true" on the stack.)
c.patch(jump_end_index, |i| Instruction::Jump(i));
OK
}
TokenKind::EqualEqual => {
compile_simple_binary_expression(c, tr, |c, arg_type| {
if c.semantics.can_convert(&arg_type, &Type::Nothing) {
c.push(Instruction::Discard);
c.push(Instruction::Discard);
Instruction::PushTrue
} else {
match arg_type {
Type::F64 => Instruction::EqFloat,
Type::String => Instruction::EqString,
Type::Bool => Instruction::EqBool, // ?
_ => c.inst_panic(format!("panic comparing {}", arg_type)),
}
}
})
}
TokenKind::Equal => {
compile_expression(c, tr.nth_tree(2).ok_or("no value")?);
c.push(Instruction::Dup);
let lvalue = tr.nth_tree(0).ok_or("no lvalue")?;
let ltree = &c[lvalue];
#[allow(unused_assignments)]
let mut environment = None;
let declaration = match ltree.kind {
// TODO: Assign to list access
TreeKind::Identifier => {
let id = c[ltree.nth_token(0).ok_or("no id")?].as_str(&c.source);
environment = Some(c.semantics.environment_of(lvalue));
environment
.as_ref()
.unwrap()
.bind(id)
.ok_or("cannot bind destination")?
}
TreeKind::MemberAccess => {
let id = c[ltree.nth_token(2).ok_or("no member")?].as_str(&c.source);
let t = ltree.nth_tree(0).ok_or("no lhs exp")?;
let typ = c.semantics.type_of(t);
environment = Some(c.semantics.member_environment(t, &typ));
environment
.as_ref()
.unwrap()
.bind(id)
.ok_or("cannot bind field")?
}
_ => return Err("unsupported lval expression"),
};
// TODO: Handle storage to non-local module.
let index = declaration.index;
let instruction = match declaration.location {
Location::Argument => {
compiler_assert!(c, t, index < c.function.args);
Instruction::StoreArgument(index)
}
Location::Local => {
if index >= c.function.locals {
c.function.locals = index + 1;
}
Instruction::StoreLocal(index)
}
Location::Module => {
compiler_assert!(c, t, index < c.module.globals);
Instruction::StoreModule(index)
}
Location::Slot => {
compile_expression(c, ltree.nth_tree(0).ok_or("no obj lhs")?);
Instruction::StoreSlot(index)
}
Location::ExternalFunction
| Location::Function
| Location::Class
| Location::Import => c.inst_panic("store to invalid location"),
};
c.push(instruction);
OK
}
_ => ice!(
c,
t,
"Unsupported binary expression '{}'",
op.as_str(&c.source)
),
}
}
fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
let ident = c[tree.nth_token(0).ok_or("no ident")?].as_str(&c.source);
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(ident).ok_or("not found")?;
compile_load_declaration(c, t, declaration)
}
fn compile_load_declaration(c: &mut Compiler, t: TreeRef, declaration: &Declaration) -> CR {
let index = declaration.index;
let instruction = match declaration.location {
Location::Local => {
if index >= c.function.locals {
c.function.locals = index + 1;
}
Instruction::LoadLocal(index)
}
Location::Argument => {
compiler_assert!(c, t, index < c.function.args);
Instruction::LoadArgument(index)
}
Location::Module => {
if declaration.module != c.semantics.mid() {
// TODO: Assert here too?
c.push(Instruction::ModulePrefix(declaration.module));
} else {
compiler_assert!(c, t, index < c.module.globals);
}
Instruction::LoadModule(index)
}
Location::Slot => {
// TODO: Assert slot is in field range?
Instruction::LoadSlot(index)
}
Location::Function => {
if declaration.module != c.semantics.mid() {
c.push(Instruction::ModulePrefix(declaration.module));
}
Instruction::LoadFunction(index)
}
Location::ExternalFunction => Instruction::LoadExternFunction(index),
// Must be a static don't worry about it.
Location::Class => return OK,
// Imports are handled with an instruction prefix.
Location::Import => return OK,
};
c.push(instruction);
OK
}
fn compile_match_expression(c: &mut Compiler, tree: &Tree) -> CR {
compile_expression(c, tree.nth_tree(1).ok_or("no val")?);
let mut patches = Vec::new();
let match_body = tree
.child_tree_of_kind(c.syntax, TreeKind::MatchBody)
.ok_or("no body")?;
for arm in match_body.children_of_kind(c.syntax, TreeKind::MatchArm) {
let arm = &c.syntax[arm];
// Evaluate pattern...
compile_pattern(c, arm.nth_tree(0).ok_or("no arm pat")?)?;
// ...If false jump to next arm.
let jump_next_index = c.push(Instruction::JumpFalse(0));
// ...If true run expression and jump out.
compile_expression(c, arm.nth_tree(2).ok_or("no arm expr")?);
patches.push(c.push(Instruction::Jump(0)));
c.patch(jump_next_index, |i| Instruction::JumpFalse(i));
}
c.push_panic("Fell through all match arms");
// Patch the jumps to the end of the match expression.
for patch in patches {
c.patch(patch, |i| Instruction::Jump(i));
}
OK
}
fn compile_is_expression(c: &mut Compiler, tree: &Tree) -> CR {
compile_expression(c, tree.nth_tree(0).ok_or("no val")?);
compile_pattern(c, tree.nth_tree(2).ok_or("no pat")?)
}
fn compile_pattern(c: &mut Compiler, t: TreeRef) -> CR {
let tree = &c.syntax[t];
// Let's *try* to generate good code in the presence of a wildcard pattern....
let is_wildcard = tree
.child_tree_of_kind(&c.syntax, TreeKind::WildcardPattern)
.is_some();
let type_expr = tree.child_tree_of_kind(&c.syntax, TreeKind::TypeExpression);
let and_index = tree.children.iter().position(|child| match child {
Child::Token(t) => c[t].kind == TokenKind::And,
_ => false,
});
// If you have a binding, dup and store now, it is in scope.
if let Some(binding) = tree.child_tree_of_kind(&c.syntax, TreeKind::VariableBinding) {
if let Some(variable) = binding.nth_token(0) {
let id = c[variable].as_str(&c.source);
let environment = c.semantics.environment_of(t);
let Some(declaration) = environment.bind(id) else {
ice!(c, t, "cannot bind pattern variable `{id}`");
};
compiler_assert!(
c,
t,
declaration.location == Location::Local,
"is cannot make a non-local, non-variable declaration"
);
// If we aren't a wildcard or we have an attached predicate then
// we will need the value on the stack, otherwise we can discard
// it.
if and_index.is_some() || !is_wildcard {
c.push(Instruction::Dup);
}
c.push(Instruction::StoreLocal(declaration.index));
}
}
if !is_wildcard {
let type_expr = type_expr.ok_or("wild but no type")?;
compile_type_expr_eq(c, type_expr.nth_tree(0).ok_or("no type expr")?);
}
if let Some(and_index) = and_index {
let jump_end_index = if is_wildcard {
// If the pattern was a wildcard then don't bother with this jump
// nonsense; we know the pattern matched, all we need to do is
// evaluate the predicate.
None
} else {
// Otherwise test the pattern to see if it passed; if it did then
// we need to run the predicate. (This is the back half of an AND
// expression.)
let jump_true_index = c.push(Instruction::JumpTrue(0));
c.push(Instruction::PushFalse);
let jump_end_index = c.push(Instruction::Jump(0));
c.patch(jump_true_index, |i| Instruction::JumpTrue(i));
Some(jump_end_index)
};
compile_expression(c, tree.nth_tree(and_index + 1).ok_or("no condition")?);
// If we wound up with a jump what needs patching, patch it.
if let Some(jump_end_index) = jump_end_index {
c.patch(jump_end_index, |i| Instruction::Jump(i));
}
} else if is_wildcard {
// If there was no predicate *and* the pattern was a wildcard then
// I'll just need to push true here.
c.push(Instruction::PushTrue);
}
OK
}
fn compile_type_expr_eq(c: &mut Compiler, t: TreeRef) {
let tree = &c.syntax[t];
let result = match tree.kind {
TreeKind::TypeIdentifier => compile_type_identifier_eq(c, t, tree),
TreeKind::AlternateType => compile_type_alternate_eq(c, tree),
_ => ice!(c, t, "tree is not a type expression"),
};
if let Err(m) = result {
c.push_panic(format!(
"internal error compiling type expression eq {:?}: {m}",
tree
));
}
}
fn compile_type_identifier_eq(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
let identifier = c[tree.nth_token(0).ok_or("no id")?].as_str(&c.source);
match identifier {
"f64" => {
c.push(Instruction::IsFloat);
}
"string" => {
c.push(Instruction::IsString);
}
"bool" => {
c.push(Instruction::IsBool);
}
"nothing" => {
c.push(Instruction::IsNothing);
}
_ => {
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(identifier).ok_or("cannot bind")?;
match declaration.location {
Location::Class => {
// TODO: Handle non-local class declaration!
// The runtime identifier of the class is the tree index
// of the class declaration sure why not.
let Origin::Source(classdecl) = declaration.origin else {
ice!(c, t, "This class declaration doesn't have an origin");
};
let index = classdecl.index();
c.push(Instruction::IsClass(index.try_into().unwrap()));
}
_ => return Err("unsupported type decl"),
}
}
};
OK
}
fn compile_type_alternate_eq(c: &mut Compiler, tree: &Tree) -> CR {
// Compile the left hand side, it leaves a bool on the stack
compile_type_expr_eq(c, tree.nth_tree(0).ok_or("no lhs")?);
// If the first part is false (boo!) then we need to evaluate the
// right hand side, so jump around the short circuit...
let jump_false_index = c.push(Instruction::JumpFalse(0));
// ...but if the first part is true then we stop here. We need to
// leave a value on the stack (it was consumed by jump above) so
// we push an extra True here and jump to the end.
c.push(Instruction::PushTrue);
let jump_end_index = c.push(Instruction::Jump(0));
// Here we are, we consumed the `false` off the stack now time to
// do the right hand side.
c.patch(jump_false_index, |i| Instruction::JumpFalse(i));
// The right hand side leaves true or false on the stack, it's
// the result of the expression.
compile_type_expr_eq(c, tree.nth_tree(2).ok_or("no rhs")?);
// (here's where you go after you leave "true" on the stack.)
c.patch(jump_end_index, |i| Instruction::Jump(i));
OK
}
fn compile_call_expression(c: &mut Compiler, tree: &Tree) -> CR {
let arg_list = tree
.child_tree_of_kind(&c.syntax, TreeKind::ArgumentList)
.ok_or("no arglist")?;
let mut args: Vec<_> = arg_list.child_trees().collect();
let arg_count = args.len();
args.reverse();
for arg in args {
compile_expression(c, arg);
}
let func = tree.nth_tree(0).ok_or("no func")?;
let func_type = c.semantics.type_of(func);
let arg_count = match func_type {
// TODO: Consider being guided by syntax here?
Type::Method(..) => arg_count + 1,
_ => arg_count,
};
compile_expression(c, func);
c.push(Instruction::Call(arg_count));
OK
}
fn compile_block_expression(c: &mut Compiler, tree: &Tree) -> CR {
if tree.children.len() == 2 {
c.push(Instruction::PushNothing);
return OK;
}
let last_is_brace = tree.nth_token(tree.children.len() - 1).is_some();
let last_index = tree.children.len() - if last_is_brace { 2 } else { 1 };
for i in 1..last_index {
compile_statement(c, tree.nth_tree(i).ok_or("no stat")?, false);
}
compile_statement(c, tree.nth_tree(last_index).ok_or("no last")?, true);
OK
}
fn compile_argument(c: &mut Compiler, tree: &Tree) -> CR {
compile_expression(c, tree.nth_tree(0).ok_or("no expr")?);
OK
}
fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
// We pass in the arguments.... by... field order?
let Type::Object(mid, ct, _) = c.semantics.type_of(t) else {
c.push_panic("new obj not ob");
return OK;
};
let class = c.semantics.class_of(mid, ct);
let field_list = tree
.child_tree_of_kind(&c.syntax, TreeKind::FieldList)
.ok_or("no field list")?;
let mut field_bindings = HashMap::new();
for field in field_list.children_of_kind(&c.syntax, TreeKind::FieldValue) {
let f = &c.syntax[field];
let name = &c.syntax[f.nth_token(0).ok_or("no field name")?];
field_bindings.insert(name.as_str(&c.source), field);
}
// The fields come in this order and since arguments are backwards
// (stack!) we compile them in reverse order. Missing fields panic,
// obviously.
for field in class.fields.iter().rev() {
let binding = field_bindings
.get(&*field.name)
.ok_or("cannot bind field")?;
compile_expression(c, *binding);
}
// Fetch the correct constructor.
// TODO: Binding this type should be done by semantics, and we should borrow it.
let type_reference = tree
.child_tree_of_kind(&c.syntax, TreeKind::TypeIdentifier)
.ok_or("no type ref")?;
let identifier = type_reference.nth_token(0).ok_or("no type id")?;
let identifier = c[identifier].as_str(&c.source);
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(identifier).ok_or("cannot bind type")?;
match declaration.location {
Location::Class => {
c.push(Instruction::LoadFunction(declaration.index));
}
_ => return Err("unsupported type for construction"),
}
c.push(Instruction::Call(class.fields.len()));
OK
}
fn compile_field_value(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
if let Some(colon) = tree.nth_token(1) {
if c[colon].kind == TokenKind::Colon {
compile_expression(c, tree.nth_tree(2).ok_or("no val")?);
return OK;
}
}
// Form 2: { x, ... }
let environment = c.semantics.environment_of(t);
let id = c[tree.nth_token(0).ok_or("no id")?].as_str(&c.source);
let declaration = environment.bind(id).ok_or("cannot bind")?;
compile_load_declaration(c, t, declaration)
}
fn compile_member_access(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
// In member access; the lhs sets up the object and in theory the rhs
// binds against it. ::shrug::
//
let lhs = tree.nth_tree(0).ok_or("no lhs")?;
compile_expression(c, lhs);
let typ = c.semantics.type_of(lhs);
let ident = c[tree.nth_token(2).ok_or("no ident")?].as_str(&c.source);
let environment = c.semantics.member_environment(t, &typ);
let declaration = environment.bind(ident).ok_or("cannot bind")?;
// NOTE: If this is a method call we still don't have to do anything
// special here, since the load of the member function will *not*
// consume the self pointer from the stack.
compile_load_declaration(c, t, declaration)?;
OK
}
fn compile_self_reference(c: &mut Compiler) -> CR {
c.push(Instruction::LoadArgument(0));
OK
}
fn compile_list_constructor(c: &mut Compiler, tree: &Tree) -> CR {
let mut children: Vec<_> = tree
.children_of_kind(&c.syntax, TreeKind::ListConstructorElement)
.collect();
children.reverse();
let count = children.len();
for child in children {
compile_expression(c, child);
}
c.push(Instruction::NewList(count));
OK
}
fn compile_list_constructor_element(c: &mut Compiler, tree: &Tree) -> CR {
compile_expression(c, tree.nth_tree(0).ok_or("no expr")?);
OK
}
fn compile_statement(c: &mut Compiler, t: TreeRef, gen_value: bool) {
let tree = &c.semantics.tree()[t];
let cr = match tree.kind {
TreeKind::Error => Err("parse error"),
TreeKind::Import => compile_import_statement(c, gen_value),
TreeKind::Block => compile_block_statement(c, t, gen_value),
TreeKind::ClassDecl => compile_class_declaration(c, gen_value),
TreeKind::ExpressionStatement => compile_expression_statement(c, tree, gen_value),
TreeKind::ForStatement => compile_for_statement(c, tree, gen_value),
TreeKind::FunctionDecl => compile_function_declaration(c, gen_value),
TreeKind::IfStatement => compile_if_statement(c, tree, gen_value),
TreeKind::LetStatement => compile_let_statement(c, t, tree, gen_value),
TreeKind::ReturnStatement => compile_return_statement(c, tree),
TreeKind::WhileStatement => compile_while_statement(c, tree, gen_value),
TreeKind::Export => compile_export_statement(c, tree, gen_value),
TreeKind::ExportList => OK,
_ => ice!(c, t, "unsupported statement tree kind {:?}", tree.kind),
};
if let Err(e) = cr {
c.push_panic(format!(
"internal error compiling statement {:?}: {e}",
tree
));
}
}
fn compile_export_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
compile_statement(c, tree.nth_tree(1).ok_or("nothing to export")?, gen_value);
OK
}
fn compile_if_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
compile_expression(c, tree.nth_tree(0).ok_or("no expr")?);
if !gen_value {
c.push(Instruction::Discard);
}
OK
}
fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
if let Some(expr) = tree.nth_tree(0) {
compile_expression(c, expr);
if tree
.nth_token(1)
.is_some_and(|t| c[t].kind == TokenKind::Semicolon)
{
c.push(Instruction::Discard);
if gen_value {
c.push(Instruction::PushNothing);
}
} else if !gen_value {
c.push(Instruction::Discard);
}
} else if gen_value {
c.push(Instruction::PushNothing);
};
OK
}
fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR {
compile_expression(c, tree.nth_tree(3).ok_or("no val")?);
let environment = c.semantics.environment_of(t);
let declaration = environment
.bind(c[tree.nth_token(1).ok_or("no id")?].as_str(&c.source))
.ok_or("cannot bind")?;
// TODO: ASSERT LOCAL DECLARATION?
let index = declaration.index;
let instruction = match declaration.location {
Location::Local => {
if index >= c.function.locals {
c.function.locals = index + 1;
}
Instruction::StoreLocal(index)
}
Location::Module => {
if declaration.module != c.semantics.mid() {
c.push(Instruction::ModulePrefix(declaration.module));
} else if index >= c.module.globals {
c.module.globals = index + 1;
}
Instruction::StoreModule(index)
}
_ => ice!(c, t, "unsuitable location for let declaration"),
};
c.push(instruction);
if gen_value {
c.push(Instruction::PushNothing);
}
OK
}
fn compile_function_declaration(c: &mut Compiler, gen_value: bool) -> CR {
if gen_value {
c.push(Instruction::PushNothing);
}
OK
}
fn compile_class_declaration(c: &mut Compiler, gen_value: bool) -> CR {
if gen_value {
c.push(Instruction::PushNothing);
}
OK
}
fn compile_function(c: &mut Compiler, t: TreeRef) -> CR {
let tree = &c.syntax[t];
match tree.kind {
TreeKind::FunctionDecl => {
let block = tree
.child_of_kind(&c.syntax, TreeKind::Block)
.ok_or("no body")?;
compile_expression(c, block);
}
TreeKind::ClassDecl => {
let count = tree
.children_of_kind(&c.syntax, TreeKind::FieldDecl)
.count();
for i in 0..count {
c.push(Instruction::LoadArgument(count - 1 - i));
}
let name = c[tree.nth_token(1).ok_or("no name")?].as_str(&c.source);
let name_index = c.add_string(name.to_string());
c.push(Instruction::PushString(name_index));
c.push(Instruction::PushInt(t.index().try_into().unwrap()));
c.push(Instruction::NewObject(count));
}
_ => ice!(c, t, "what is this tree doing in compile_function?"),
}
c.push(Instruction::Return);
OK
}
fn compile_block_statement(c: &mut Compiler, t: TreeRef, gen_value: bool) -> CR {
compile_expression(c, t);
if !gen_value {
c.push(Instruction::Discard);
}
OK
}
fn compile_while_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
let start_index = c.function.instructions.len();
compile_expression(c, tree.nth_tree(1).ok_or("no cond")?);
let jump_end_index = c.push(Instruction::JumpFalse(0));
compile_block_statement(c, tree.nth_tree(2).ok_or("no body")?, false)?;
c.push(Instruction::Jump(start_index));
c.patch(jump_end_index, |i| Instruction::JumpFalse(i));
if gen_value {
c.push(Instruction::PushNothing);
}
OK
}
fn compile_return_statement(c: &mut Compiler, tree: &Tree) -> CR {
if let Some(expr) = tree.nth_tree(1) {
compile_expression(c, expr);
} else {
c.push(Instruction::PushNothing);
}
c.push(Instruction::Return);
OK
}
fn compile_for_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
// Figure out the variable.
let vt = tree.nth_tree(1).ok_or("no var")?;
let var = &c[vt];
let id = c[var.nth_token(0).ok_or("no id")?].as_str(&c.source);
let body = tree.nth_tree(4).ok_or("no body")?;
let env = c.semantics.environment_of(body);
let Some(variable_decl) = env.bind(id) else {
ice!(c, body, "Unable to bind {id} in loop body");
};
compiler_assert_eq!(
c,
vt,
variable_decl.location,
Location::Local,
"expected loop variable to be local"
);
let variable_slot = variable_decl.index;
// Figure out the generator.
let iterable = tree.nth_tree(3).ok_or("no generator")?;
compile_expression(c, iterable);
// call 'get_iterator'
let iterable_typ = c.semantics.type_of(iterable);
match iterable_typ {
Type::List(_) => {
c.push(Instruction::LoadExternFunction(
EXTERN_BUILTIN_LIST_GET_ITERATOR,
));
c.push(Instruction::Call(1));
}
_ => return Err("unsupported collection"), // TODO: Bind and call get_iterator() on type of iterable
}
// iterate
// (Stack is clear except for the iterator.)
let loop_top = c.push(Instruction::Dup); // Save the iterator
match iterable_typ {
Type::List(_) => {
c.push(Instruction::LoadExternFunction(
EXTERN_BUILTIN_LIST_ITERATOR_NEXT,
));
}
_ => return Err("unsupported iterator"), // TODO: Bind and call next() on type of iterator
}
c.push(Instruction::Call(1)); // Call 'next'
c.push(Instruction::Dup); // Save the result
c.push(Instruction::IsNothing); // Check to see if iteration is done
let jump_to_end = c.push(Instruction::JumpTrue(0));
c.push(Instruction::StoreLocal(variable_slot)); // Store the value
// (Stack is clear except for the iterator.)
compile_statement(c, body, false); // Run the body
// (Stack is clear except for the iterator.)
c.push(Instruction::Jump(loop_top));
// Clean up the loop.
c.patch(jump_to_end, |i| Instruction::JumpTrue(i));
c.push(Instruction::Discard); // Drop the unused value
c.push(Instruction::Discard); // Drop the iterator
if gen_value {
c.push(Instruction::PushNothing);
}
OK
}
fn compile_import_statement(c: &mut Compiler, gen_value: bool) -> CR {
if gen_value {
c.push(Instruction::PushNothing);
}
OK
}