[fine] Starting to look like something

This commit is contained in:
John Doty 2024-01-09 07:56:14 -08:00
parent d14c9a72df
commit fa53841af9
4 changed files with 232 additions and 140 deletions

View file

@ -1,5 +1,7 @@
use std::collections::HashMap;
use crate::{
parser::{Tree, TreeKind, TreeRef},
parser::{SyntaxTree, Tree, TreeKind, TreeRef},
semantics::{Location, Semantics, Type},
tokens::TokenKind,
};
@ -29,49 +31,148 @@ pub enum Instruction {
StoreLocal(usize),
}
pub enum Export {
Function(usize),
Global(usize),
}
pub struct Module {
pub functions: Vec<Function>, // Functions
pub globals: usize, // The number of global variables
pub exports: HashMap<String, Export>, // Exports by name
pub init: usize, // The index of the initialization function
}
impl Module {
pub fn new() -> Self {
Module {
functions: Vec::new(),
globals: 0,
exports: HashMap::new(),
init: 0,
}
}
}
pub struct Function {
name: String,
instructions: Vec<Instruction>,
strings: Vec<String>,
args: usize, // TODO: Probably type information too?
locals: usize, // TODO: Same?
}
impl Function {
pub fn new(name: &str) -> Self {
Function {
name: name.to_string(),
instructions: Vec::new(),
strings: Vec::new(),
args: 0,
locals: 0,
}
}
pub fn name(&self) -> &str {
&self.name
}
}
struct Compiler<'a> {
semantics: &'a Semantics<'a>,
syntax: &'a SyntaxTree<'a>,
module: Module,
function: Function,
}
impl<'a> Compiler<'a> {
pub fn type_of(&self, t: TreeRef) -> Type {
self.semantics.type_of(t)
}
fn add_string(&mut self, result: String) -> usize {
let index = self.function.strings.len();
self.function.strings.push(result);
index
}
fn push(&mut self, inst: Instruction) -> usize {
let index = self.function.instructions.len();
self.function.instructions.push(inst);
index
}
fn patch(&mut self, i: usize, f: impl FnOnce(usize) -> Instruction) {
let index = self.function.instructions.len();
self.function.instructions[i] = f(index);
}
}
pub fn compile(semantics: &Semantics) -> Module {
let mut compiler = Compiler {
semantics,
syntax: semantics.tree(),
module: Module::new(),
function: Function::new("<< module >>"),
};
if let Some(t) = semantics.tree().root() {
file(&mut compiler, t);
}
let mut module = compiler.module;
let index = module.functions.len();
module.functions.push(compiler.function);
module.init = index;
module
}
fn file(c: &mut Compiler, t: TreeRef) {
let tree = &c.syntax[t];
assert_eq!(tree.kind, TreeKind::File);
for i in 0..tree.children.len() {
if let Some(t) = tree.nth_tree(i) {
compile_statement(c, t, false);
}
}
}
type CR = Option<()>;
const OK: CR = CR::Some(());
pub fn compile_expression(code: &mut Function, semantics: &Semantics, t: TreeRef) {
let tree = &semantics.tree()[t];
fn compile_expression(c: &mut Compiler, t: TreeRef) {
let tree = &c.syntax[t];
let cr = match tree.kind {
TreeKind::Error => None,
TreeKind::LiteralExpression => compile_literal(code, semantics, t, tree),
TreeKind::GroupingExpression => compile_grouping(code, semantics, tree),
TreeKind::UnaryExpression => compile_unary_operator(code, semantics, tree),
TreeKind::ConditionalExpression => compile_condition_expression(code, semantics, tree),
TreeKind::BinaryExpression => compile_binary_expression(code, semantics, tree),
TreeKind::Identifier => compile_identifier_expression(code, semantics, t, tree),
TreeKind::LiteralExpression => compile_literal(c, t, tree),
TreeKind::GroupingExpression => compile_grouping(c, tree),
TreeKind::UnaryExpression => compile_unary_operator(c, tree),
TreeKind::ConditionalExpression => compile_condition_expression(c, tree),
TreeKind::BinaryExpression => compile_binary_expression(c, tree),
TreeKind::Identifier => compile_identifier_expression(c, t, tree),
TreeKind::CallExpression => todo!(),
TreeKind::Block => compile_block_expression(code, semantics, tree),
_ => {
semantics.internal_compiler_error(Some(t), "tree is not an expression, cannot compile")
}
TreeKind::Block => compile_block_expression(c, tree),
_ => c
.semantics
.internal_compiler_error(Some(t), "tree is not an expression, cannot compile"),
};
if matches!(cr, None) {
code.instructions.push(Instruction::Panic);
c.push(Instruction::Panic);
}
}
fn compile_literal(code: &mut Function, semantics: &Semantics, t: TreeRef, tr: &Tree) -> CR {
fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
let tok = tr.nth_token(0)?;
match semantics.type_of(t) {
Type::F64 => code
.instructions
.push(Instruction::PushFloat(tok.as_str().parse().unwrap())),
Type::Bool => code.instructions.push(if tok.kind == TokenKind::True {
match c.type_of(t) {
Type::F64 => c.push(Instruction::PushFloat(tok.as_str().parse().unwrap())),
Type::Bool => c.push(if tok.kind == TokenKind::True {
Instruction::PushTrue
} else {
Instruction::PushFalse
}),
Type::String => {
let index = code.strings.len();
// TODO: Interpret string here make good!
let mut result = String::new();
let mut input = tok.as_str().chars();
@ -91,232 +192,195 @@ fn compile_literal(code: &mut Function, semantics: &Semantics, t: TreeRef, tr: &
result.push(ch)
}
}
code.strings.push(result);
code.instructions.push(Instruction::PushString(index))
let index = c.add_string(result);
c.push(Instruction::PushString(index))
}
Type::Error => code.instructions.push(Instruction::Panic),
Type::Error => c.push(Instruction::Panic),
_ => panic!("unsupported literal type: {t:?}"),
};
OK
}
fn compile_grouping(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR {
compile_expression(code, semantics, t.nth_tree(1)?);
fn compile_grouping(c: &mut Compiler, t: &Tree) -> CR {
compile_expression(c, t.nth_tree(1)?);
OK
}
fn compile_unary_operator(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR {
compile_expression(code, semantics, t.nth_tree(1)?);
fn compile_unary_operator(c: &mut Compiler, t: &Tree) -> CR {
compile_expression(c, t.nth_tree(1)?);
let tok = t.nth_token(0)?;
match tok.kind {
TokenKind::Minus => {
code.instructions.push(Instruction::PushFloat(-1.0));
code.instructions.push(Instruction::FloatMultiply);
c.push(Instruction::PushFloat(-1.0));
c.push(Instruction::FloatMultiply);
}
TokenKind::Bang => {
code.instructions.push(Instruction::BoolNot);
c.push(Instruction::BoolNot);
}
_ => panic!("unsupported unary operator"),
}
OK
}
fn compile_condition_expression(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR {
fn compile_condition_expression(c: &mut Compiler, t: &Tree) -> CR {
let condition = t.nth_tree(1)?;
compile_expression(code, semantics, condition);
compile_expression(c, condition);
let jump_else_index = code.instructions.len();
code.instructions.push(Instruction::JumpFalse(0));
let jump_else_index = c.push(Instruction::JumpFalse(0));
let then_branch = t.nth_tree(2)?;
compile_expression(code, semantics, then_branch);
compile_expression(c, then_branch);
if let Some(else_branch) = t.nth_tree(4) {
let jump_end_index = code.instructions.len();
code.instructions.push(Instruction::Jump(0));
let jump_end_index = c.push(Instruction::Jump(0));
c.patch(jump_else_index, |i| Instruction::JumpFalse(i));
let else_index = code.instructions.len();
code.instructions[jump_else_index] = Instruction::JumpFalse(else_index);
compile_expression(code, semantics, else_branch);
let end_index = code.instructions.len();
code.instructions[jump_end_index] = Instruction::Jump(end_index);
compile_expression(c, else_branch);
c.patch(jump_end_index, |i| Instruction::Jump(i));
} else {
let else_index = code.instructions.len();
code.instructions[jump_else_index] = Instruction::JumpFalse(else_index);
c.patch(jump_else_index, |i| Instruction::JumpFalse(i));
}
OK
}
fn compile_binary_expression(code: &mut Function, semantics: &Semantics, t: &Tree) -> CR {
compile_expression(code, semantics, t.nth_tree(0)?);
fn compile_binary_expression(c: &mut Compiler, t: &Tree) -> CR {
compile_expression(c, t.nth_tree(0)?);
match t.nth_token(1)?.kind {
TokenKind::Plus => {
compile_expression(code, semantics, t.nth_tree(2)?);
code.instructions.push(Instruction::FloatAdd);
compile_expression(c, t.nth_tree(2)?);
c.push(Instruction::FloatAdd);
}
TokenKind::Minus => {
compile_expression(code, semantics, t.nth_tree(2)?);
code.instructions.push(Instruction::FloatSubtract);
compile_expression(c, t.nth_tree(2)?);
c.push(Instruction::FloatSubtract);
}
TokenKind::Star => {
compile_expression(code, semantics, t.nth_tree(2)?);
code.instructions.push(Instruction::FloatMultiply);
compile_expression(c, t.nth_tree(2)?);
c.push(Instruction::FloatMultiply);
}
TokenKind::Slash => {
compile_expression(code, semantics, t.nth_tree(2)?);
code.instructions.push(Instruction::FloatDivide);
compile_expression(c, t.nth_tree(2)?);
c.push(Instruction::FloatDivide);
}
TokenKind::And => {
let jump_false_index = code.instructions.len();
code.instructions.push(Instruction::JumpFalse(0));
code.instructions.push(Instruction::PushTrue);
let jump_false_index = c.push(Instruction::JumpFalse(0));
let jump_end_index = code.instructions.len();
code.instructions.push(Instruction::Jump(0));
c.push(Instruction::PushTrue);
let jump_end_index = c.push(Instruction::Jump(0));
let false_index = code.instructions.len();
code.instructions[jump_false_index] = Instruction::JumpFalse(false_index);
c.patch(jump_false_index, |i| Instruction::JumpFalse(i));
compile_expression(code, semantics, t.nth_tree(2)?);
compile_expression(c, t.nth_tree(2)?);
let end_index = code.instructions.len();
code.instructions[jump_end_index] = Instruction::Jump(end_index);
c.patch(jump_end_index, |i| Instruction::Jump(i));
}
TokenKind::Or => {
let jump_true_index = code.instructions.len();
code.instructions.push(Instruction::JumpTrue(0));
code.instructions.push(Instruction::PushTrue);
let jump_true_index = c.push(Instruction::JumpTrue(0));
let jump_end_index = code.instructions.len();
code.instructions.push(Instruction::Jump(0));
c.push(Instruction::PushTrue);
let jump_end_index = c.push(Instruction::Jump(0));
let true_index = code.instructions.len();
code.instructions[jump_true_index] = Instruction::JumpTrue(true_index);
c.patch(jump_true_index, |i| Instruction::JumpTrue(i));
compile_expression(code, semantics, t.nth_tree(2)?);
compile_expression(c, t.nth_tree(2)?);
let end_index = code.instructions.len();
code.instructions[jump_end_index] = Instruction::Jump(end_index);
c.patch(jump_end_index, |i| Instruction::Jump(i));
}
_ => panic!("Unsupported binary expression"),
}
OK
}
fn compile_identifier_expression(
code: &mut Function,
semantics: &Semantics,
t: TreeRef,
tree: &Tree,
) -> Option<()> {
fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> Option<()> {
let ident = tree.nth_token(0)?;
let environment = semantics.environment_of(t);
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(ident)?;
let instruction = match declaration.location {
Location::Local => Instruction::LoadLocal(declaration.index),
Location::Argument => Instruction::LoadArgument(declaration.index),
Location::Local => {
if declaration.index >= c.function.locals {
c.function.locals = declaration.index + 1;
}
Instruction::LoadLocal(declaration.index)
}
Location::Argument => {
assert!(declaration.index < c.function.args);
Instruction::LoadArgument(declaration.index)
}
Location::Module => Instruction::LoadModule(declaration.index),
};
code.instructions.push(instruction);
c.push(instruction);
OK
}
fn compile_block_expression(code: &mut Function, semantics: &Semantics, tree: &Tree) -> Option<()> {
fn compile_block_expression(c: &mut Compiler, tree: &Tree) -> Option<()> {
let last_is_brace = tree.nth_token(tree.children.len() - 1).is_some();
let last_index = tree.children.len() - if last_is_brace { 2 } else { 1 };
for i in 1..last_index {
compile_statement(code, semantics, tree.nth_tree(i)?, false);
compile_statement(c, tree.nth_tree(i)?, false);
}
compile_statement(code, semantics, tree.nth_tree(last_index)?, true);
compile_statement(c, tree.nth_tree(last_index)?, true);
OK
}
pub fn compile_statement(code: &mut Function, semantics: &Semantics, t: TreeRef, gen_value: bool) {
let tree = &semantics.tree()[t];
fn compile_statement(c: &mut Compiler, t: TreeRef, gen_value: bool) {
let tree = &c.semantics.tree()[t];
let cr = match tree.kind {
TreeKind::FunctionDecl => compile_function_declaration(code, semantics, tree, gen_value),
TreeKind::LetStatement => compile_let_statement(code, semantics, t, tree, gen_value),
TreeKind::ExpressionStatement => {
compile_expression_statement(code, semantics, tree, gen_value)
}
TreeKind::IfStatement => compile_if_statement(code, semantics, tree, gen_value),
TreeKind::FunctionDecl => compile_function_declaration(c, tree, gen_value),
TreeKind::LetStatement => compile_let_statement(c, t, tree, gen_value),
TreeKind::ExpressionStatement => compile_expression_statement(c, tree, gen_value),
TreeKind::IfStatement => compile_if_statement(c, tree, gen_value),
_ => panic!("unsupported tree kind {:?}", tree.kind),
};
if matches!(cr, None) {
code.instructions.push(Instruction::Panic);
c.push(Instruction::Panic);
}
}
fn compile_if_statement(
code: &mut Function,
semantics: &Semantics,
tree: &Tree,
gen_value: bool,
) -> CR {
compile_expression(code, semantics, tree.nth_tree(0)?);
fn compile_if_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
compile_expression(c, tree.nth_tree(0)?);
if !gen_value {
code.instructions.push(Instruction::Discard);
c.push(Instruction::Discard);
}
OK
}
fn compile_expression_statement(
code: &mut Function,
semantics: &Semantics,
tree: &Tree,
gen_value: bool,
) -> CR {
compile_expression(code, semantics, tree.nth_tree(0)?);
fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
compile_expression(c, tree.nth_tree(0)?);
if tree
.nth_token(1)
.is_some_and(|t| t.kind == TokenKind::Semicolon)
{
code.instructions.push(Instruction::Discard);
c.push(Instruction::Discard);
if gen_value {
code.instructions.push(Instruction::PushNothing);
c.push(Instruction::PushNothing);
}
} else if !gen_value {
code.instructions.push(Instruction::Discard);
c.push(Instruction::Discard);
}
OK
}
fn compile_let_statement(
code: &mut Function,
semantics: &Semantics,
t: TreeRef,
tree: &Tree,
gen_value: bool,
) -> CR {
compile_expression(code, semantics, tree.nth_tree(3)?);
let environment = semantics.environment_of(t);
fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR {
compile_expression(c, tree.nth_tree(3)?);
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(tree.nth_token(1)?)?;
// NOTE: Because this is a let statement I assume it's local!
assert!(matches!(declaration.location, Location::Local));
code.instructions
.push(Instruction::StoreLocal(declaration.index));
c.push(Instruction::StoreLocal(declaration.index));
if gen_value {
code.instructions.push(Instruction::PushNothing);
c.push(Instruction::PushNothing);
}
OK
}
fn compile_function_declaration(
_code: &mut Function,
_semantics: &Semantics,
_tree: &Tree,
_gen_value: bool,
) -> CR {
fn compile_function_declaration(_c: &mut Compiler, _tree: &Tree, _gen_value: bool) -> CR {
todo!()
}

View file

@ -138,6 +138,7 @@ pub enum TreeKind {
BinaryExpression,
IfStatement,
Identifier,
PrintStatement,
}
pub struct Tree<'a> {
@ -555,10 +556,32 @@ fn statement(p: &mut CParser) {
// require a semicolon at the end if it's all by itself.
TokenKind::If => statement_if(p),
TokenKind::Print => statement_print(p),
_ => statement_expression(p),
}
}
fn statement_print(p: &mut CParser) {
assert!(p.at(TokenKind::Print));
let m = p.start();
p.expect(
TokenKind::Print,
"expect 'print' to start a print statement",
);
p.expect(TokenKind::LeftParen, "expect '(' to start a print");
if !p.at(TokenKind::RightParen) {
expression(p);
}
p.expect(TokenKind::RightParen, "expect ')' after a print statement");
if !p.at(TokenKind::RightBrace) {
p.expect(TokenKind::Semicolon, "expect ';' to end a print statement");
}
p.end(m, TreeKind::PrintStatement);
}
fn statement_if(p: &mut CParser) {
assert!(p.at(TokenKind::If));
let m = p.start();

View file

@ -4,6 +4,8 @@ use crate::{
};
use std::{cell::RefCell, collections::HashMap, fmt, rc::Rc};
// TODO: Unused variables?
// TODO: An error should have:
//
// - a start

View file

@ -18,14 +18,17 @@
// | LiteralExpression
// | Number:'"2"'
// | Semicolon:'";"'
// | ExpressionStatement
// | PrintStatement
// | Print:'"print"'
// | LeftParen:'"("'
// | Identifier
// | Identifier:'"y"'
// | RightParen:'")"'
// | Semicolon:'";"'
// |
let x = 23;
let y = x * 2;
y;
print(y);
// @type: 590 f64
// @type: 667 f64