From 2dbdbb39579a3f5a35f0cfa066790653de043129 Mon Sep 17 00:00:00 2001 From: John Doty Date: Sun, 11 Feb 2024 09:31:51 -0800 Subject: [PATCH] [fine] Lifetime garbage, big refactor So it turns out that I can't hold `&str` in token because it makes it impossible to encapsulate a source file in the larger context- self referential structure problems again. Everything gets rebuilt so that the source can be passed through. While we're at it, more things become Rc<> because, man..... life it too short. Semantics in particular has become a giant hub of the module state: we can basically just hold an Rc and have everything we could possibly want to know about a source file, computed lazily if necessary. --- fine/build.rs | 19 +- fine/src/compiler.rs | 133 ++++++------- fine/src/lib.rs | 25 ++- fine/src/parser.rs | 113 ++++++----- fine/src/semantics.rs | 387 +++++++++++++++++++++++------------- fine/src/tokens.rs | 100 ++++++---- fine/tests/example_tests.rs | 54 +++-- 7 files changed, 502 insertions(+), 329 deletions(-) diff --git a/fine/build.rs b/fine/build.rs index 5bafd965..25c12cc6 100644 --- a/fine/build.rs +++ b/fine/build.rs @@ -57,7 +57,7 @@ fn generate_test_for_file(path: PathBuf) -> String { } assertions.push(quote! { - crate::assert_concrete(&_tree, #concrete, #display_path); + crate::assert_concrete(source.clone(), _tree.clone(), #concrete, #display_path); }); } else if line == "@compiles-to:" { let mut compiled = String::new(); @@ -72,7 +72,7 @@ fn generate_test_for_file(path: PathBuf) -> String { } assertions.push(quote! { - crate::assert_compiles_to(&_tree, &_lines, #compiled, #display_path); + crate::assert_compiles_to(source.clone(), _tree.clone(), _lines.clone(), #compiled, #display_path); }); } else if let Some(line) = line.strip_prefix("@type:") { let (pos, expected) = line @@ -85,7 +85,7 @@ fn generate_test_for_file(path: PathBuf) -> String { .expect(&format!("Unable to parse position '{pos}'")); let expected = expected.trim(); assertions.push(quote! { - crate::assert_type_at(&_tree, &_lines, #pos, #expected, #display_path); + crate::assert_type_at(source.clone(), _tree.clone(), _lines.clone(), #pos, #expected, #display_path); }); } else if let Some(line) = line.strip_prefix("@type-error:") { let (pos, expected) = line @@ -98,21 +98,21 @@ fn generate_test_for_file(path: PathBuf) -> String { .expect(&format!("Unable to parse position '{pos}'")); let expected = expected.trim(); assertions.push(quote! { - crate::assert_type_error_at(&_tree, &_lines, #pos, #expected, #display_path); + crate::assert_type_error_at(source.clone(), _tree.clone(), _lines.clone(), #pos, #expected, #display_path); }); } else if line == "@no-errors" { assertions.push(quote! { - crate::assert_no_errors(&_tree, &_lines); + crate::assert_no_errors(source.clone(), _tree.clone(), _lines.clone()); }); } else if let Some(line) = line.strip_prefix("@eval:") { let expected = line.trim(); assertions.push(quote! { - crate::assert_eval_ok(&_tree, &_lines, #expected); + crate::assert_eval_ok(source.clone(), _tree.clone(), _lines.clone(), #expected); }); } else if let Some(line) = line.strip_prefix("@check-error:") { let expected = line.trim(); assertions.push(quote! { - crate::assert_check_error(&_tree, &_lines, #expected); + crate::assert_check_error(source.clone(), _tree.clone(), _lines.clone(), #expected); }); } else if line == "@expect-errors:" { let mut errors = Vec::new(); @@ -127,7 +127,7 @@ fn generate_test_for_file(path: PathBuf) -> String { let errors = ExpectedErrors(errors); assertions.push(quote! { - crate::assert_errors(&_tree, &_lines, #errors); + crate::assert_errors(source.clone(), _tree.clone(), _lines.clone(), #errors); }); } else if line.starts_with("@") { panic!("Test file {display_path} has unknown directive: {line}"); @@ -138,7 +138,8 @@ fn generate_test_for_file(path: PathBuf) -> String { let test_method = quote! { #disabled fn #name() { - let (_tree, _lines) = fine::parser::parse(#contents); + let source : std::rc::Rc = #contents.into(); + let (_tree, _lines) = fine::parser::parse(&source); #(#assertions)* } }; diff --git a/fine/src/compiler.rs b/fine/src/compiler.rs index 50173529..0fff52fc 100644 --- a/fine/src/compiler.rs +++ b/fine/src/compiler.rs @@ -148,8 +148,9 @@ struct FunctionKey { } struct Compiler<'a> { - semantics: &'a Semantics<'a>, - syntax: &'a SyntaxTree<'a>, + source: &'a str, + semantics: &'a Semantics, + syntax: &'a SyntaxTree, function_bindings: HashMap, pending_functions: Vec<(FunctionKey, usize, Function)>, @@ -183,8 +184,7 @@ macro_rules! compiler_assert_eq { let left = &$ll; let right = &$rr; if left != right { - let semantics = $compiler.semantics; - semantics.dump_compiler_state(Some($tr)); + $compiler.semantics.dump_compiler_state(Some($tr)); assert_eq!(left, right); } @@ -194,8 +194,7 @@ macro_rules! compiler_assert_eq { let left = &$ll; let right = &$rr; if left != right { - let semantics = $compiler.semantics; - semantics.dump_compiler_state(Some($tr)); + $compiler.semantics.dump_compiler_state(Some($tr)); assert_eq!(left, right, $($t)*); } @@ -205,8 +204,7 @@ macro_rules! compiler_assert_eq { macro_rules! compiler_assert { ($compiler:expr, $tr:expr, $cond:expr $(,)?) => {{ if !$cond { - let semantics = $compiler.semantics; - semantics.dump_compiler_state(Some($tr)); + $compiler.semantics.dump_compiler_state(Some($tr)); assert!($cond); } @@ -214,8 +212,7 @@ macro_rules! compiler_assert { ($compiler:expr, $tr:expr, $cond:expr, $($arg:tt)+) => {{ if !$cond { - let semantics = $compiler.semantics; - semantics.dump_compiler_state(Some($tr)); + $compiler.semantics.dump_compiler_state(Some($tr)); assert!($cond, $($arg)*); } @@ -223,9 +220,8 @@ macro_rules! compiler_assert { } macro_rules! ice { - ($compiler: expr, $tr:expr, $($t:tt)+) => {{ - let semantics = $compiler.semantics; - semantics.dump_compiler_state(Some($tr)); + ($compiler:expr, $tr:expr, $($t:tt)+) => {{ + $compiler.semantics.dump_compiler_state(Some($tr)); panic!($($t)*) }} } @@ -241,10 +237,15 @@ macro_rules! inst_panic { // ($compiler:expr, $tr:expr, $($t:tt)*) => {{}}; // } -pub fn compile(semantics: &Semantics) -> Rc { +pub fn compile(semantics: Rc) -> Rc { + let source = semantics.source(); + let syntax_tree = semantics.tree(); + let mut compiler = Compiler { - semantics, - syntax: semantics.tree(), + source: &source, + semantics: &semantics, + syntax: &syntax_tree, + function_bindings: HashMap::new(), pending_functions: Vec::new(), temp_functions: Vec::new(), @@ -328,14 +329,16 @@ fn compile_expression(c: &mut Compiler, t: TreeRef) { fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { let tok = tr.nth_token(0)?; match c.semantics.type_of(t) { - Type::F64 => c.push(Instruction::PushFloat(tok.as_str().parse().unwrap())), + Type::F64 => c.push(Instruction::PushFloat( + tok.as_str(c.source).parse().unwrap(), + )), Type::Bool => c.push(if tok.kind == TokenKind::True { Instruction::PushTrue } else { Instruction::PushFalse }), Type::String => { - let result = string_constant_to_string(tok.as_str()); + let result = string_constant_to_string(tok.as_str(c.source)); let index = c.add_string(result); c.push(Instruction::PushString(index)) } @@ -534,24 +537,16 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { let declaration = match ltree.kind { // TODO: Assign to list access TreeKind::Identifier => { - let id = ltree.nth_token(0)?; + let id = ltree.nth_token(0)?.as_str(&c.source); environment = c.semantics.environment_of(lvalue); environment.bind(id)? } TreeKind::MemberAccess => { - let id = ltree.nth_token(2)?; - let typ = c.semantics.type_of(ltree.nth_tree(0)?); - environment = match &typ { - Type::Object(ct, _) => { - let class = c.semantics.class_of(*ct); - class.env.clone() - } - Type::Class(ct, _) => { - let class = c.semantics.class_of(*ct); - class.static_env.clone() - } - _ => return None, - }; + let id = ltree.nth_token(2)?.as_str(&c.source); + + let t = ltree.nth_tree(0)?; + let typ = c.semantics.type_of(t); + environment = c.semantics.member_environment(t, &typ); environment.bind(id)? } _ => return None, @@ -587,16 +582,22 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR { Declaration::ExternFunction { .. } => inst_panic!("store ext"), Declaration::Function { .. } => inst_panic!("store func"), Declaration::Class { .. } => inst_panic!("store class"), + Declaration::Import { .. } => inst_panic!("store import"), }; c.push(instruction); OK } - _ => ice!(c, t, "Unsupported binary expression '{op}'"), + _ => ice!( + c, + t, + "Unsupported binary expression '{}'", + op.as_str(&c.source) + ), } } fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> Option<()> { - let ident = tree.nth_token(0)?; + let ident = tree.nth_token(0)?.as_str(&c.source); let environment = c.semantics.environment_of(t); let declaration = environment.bind(ident)?; @@ -659,6 +660,8 @@ fn compile_load_declaration(c: &mut Compiler, t: TreeRef, declaration: &Declarat // Must be a static don't worry about it. Declaration::Class { .. } => return OK, + + Declaration::Import { .. } => todo!(), }; c.push(instruction); @@ -677,10 +680,10 @@ fn compile_pattern(c: &mut Compiler, t: TreeRef) -> Option<()> { // Let's *try* to generate good code in the presence of a wildcard pattern.... let is_wildcard = tree - .child_tree_of_kind(c.syntax, TreeKind::WildcardPattern) + .child_tree_of_kind(&c.syntax, TreeKind::WildcardPattern) .is_some(); - let type_expr = tree.child_tree_of_kind(c.syntax, TreeKind::TypeExpression); + let type_expr = tree.child_tree_of_kind(&c.syntax, TreeKind::TypeExpression); let and_index = tree.children.iter().position(|c| match c { Child::Token(t) => t.kind == TokenKind::And, @@ -688,10 +691,10 @@ fn compile_pattern(c: &mut Compiler, t: TreeRef) -> Option<()> { }); // If you have a binding, dup and store now, it is in scope. - if let Some(binding) = tree.child_tree_of_kind(c.syntax, TreeKind::VariableBinding) { + if let Some(binding) = tree.child_tree_of_kind(&c.syntax, TreeKind::VariableBinding) { if let Some(variable) = binding.nth_token(0) { let environment = c.semantics.environment_of(t); - let declaration = environment.bind(variable)?; + let declaration = environment.bind(variable.as_str(&c.source))?; let Declaration::Variable { location: Location::Local, @@ -765,8 +768,8 @@ fn compile_type_expr_eq(c: &mut Compiler, t: TreeRef) { } fn compile_type_identifier_eq(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { - let identifier = tree.nth_token(0)?; - match identifier.as_str() { + let identifier = tree.nth_token(0)?.as_str(&c.source); + match identifier { "f64" => { c.push(Instruction::IsFloat); } @@ -825,7 +828,7 @@ fn compile_type_alternate_eq(c: &mut Compiler, tree: &Tree) -> CR { } fn compile_call_expression(c: &mut Compiler, tree: &Tree) -> CR { - let arg_list = tree.child_tree_of_kind(c.syntax, TreeKind::ArgumentList)?; + let arg_list = tree.child_tree_of_kind(&c.syntax, TreeKind::ArgumentList)?; let mut args: Vec<_> = arg_list.child_trees().collect(); let arg_count = args.len(); @@ -876,12 +879,12 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C }; let class = c.semantics.class_of(ct); - let field_list = tree.child_tree_of_kind(c.syntax, TreeKind::FieldList)?; + let field_list = tree.child_tree_of_kind(&c.syntax, TreeKind::FieldList)?; let mut field_bindings = HashMap::new(); - for field in field_list.children_of_kind(c.syntax, TreeKind::FieldValue) { + for field in field_list.children_of_kind(&c.syntax, TreeKind::FieldValue) { let f = &c.syntax[field]; let name = f.nth_token(0)?; - field_bindings.insert(name.as_str(), field); + field_bindings.insert(name.as_str(&c.source), field); } // The fields come in this order and since arguments are backwards @@ -894,8 +897,8 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C // Fetch the correct constructor. // TODO: Binding this type should be done by semantics, and we should borrow it. - let type_reference = tree.child_tree_of_kind(c.syntax, TreeKind::TypeIdentifier)?; - let identifier = type_reference.nth_token(0)?; + let type_reference = tree.child_tree_of_kind(&c.syntax, TreeKind::TypeIdentifier)?; + let identifier = type_reference.nth_token(0)?.as_str(&c.source); let environment = c.semantics.environment_of(t); match environment.bind(identifier)? { Declaration::Class { declaration, .. } => { @@ -931,7 +934,7 @@ fn compile_field_value(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { // Form 2: { x, ... } let environment = c.semantics.environment_of(t); - let id = tree.nth_token(0)?; + let id = tree.nth_token(0)?.as_str(&c.source); let declaration = environment.bind(id)?; compile_load_declaration(c, t, declaration) @@ -944,7 +947,7 @@ fn compile_member_access(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR { compile_expression(c, tree.nth_tree(0)?); let typ = c.semantics.type_of(tree.nth_tree(0)?); - let ident = tree.nth_token(2)?; + let ident = tree.nth_token(2)?.as_str(&c.source); let environment = match &typ { Type::Object(ct, _) => { @@ -976,7 +979,7 @@ fn compile_self_reference(c: &mut Compiler) -> CR { fn compile_list_constructor(c: &mut Compiler, tree: &Tree) -> CR { let mut children: Vec<_> = tree - .children_of_kind(c.syntax, TreeKind::ListConstructorElement) + .children_of_kind(&c.syntax, TreeKind::ListConstructorElement) .collect(); children.reverse(); let count = children.len(); @@ -1046,7 +1049,7 @@ fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR { compile_expression(c, tree.nth_tree(3)?); let environment = c.semantics.environment_of(t); - let declaration = environment.bind(tree.nth_token(1)?)?; + let declaration = environment.bind(tree.nth_token(1)?.as_str(&c.source))?; let Declaration::Variable { location, index, .. @@ -1087,19 +1090,16 @@ fn compile_function_declaration(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_v let fk = FunctionKey { tree: t }; if !c.function_bindings.contains_key(&fk) { // TODO: If this is a method the name should be different. - let name = tree.nth_token(1)?; + let name = tree.nth_token(1)?.as_str(&c.source); - let param_list = tree.child_tree_of_kind(c.syntax, TreeKind::ParamList)?; + let param_list = tree.child_tree_of_kind(&c.syntax, TreeKind::ParamList)?; let param_count = param_list.children.len() - 2; let function_index = c.temp_functions.len(); c.temp_functions.push(None); - c.pending_functions.push(( - fk.clone(), - function_index, - Function::new(name.as_str(), param_count), - )); + c.pending_functions + .push((fk.clone(), function_index, Function::new(name, param_count))); c.function_bindings.insert(fk, function_index); c.module .exports @@ -1118,18 +1118,15 @@ fn compile_class_declaration(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_valu // Classes get compiled as constructor functions which get called. let fk = FunctionKey { tree: t }; if !c.function_bindings.contains_key(&fk) { - let name = tree.nth_token(1)?; + let name = tree.nth_token(1)?.as_str(&c.source); let field_count = tree.children.len() - 2; let function_index = c.temp_functions.len(); c.temp_functions.push(None); - c.pending_functions.push(( - fk.clone(), - function_index, - Function::new(name.as_str(), field_count), - )); + c.pending_functions + .push((fk.clone(), function_index, Function::new(name, field_count))); c.function_bindings.insert(fk, function_index); c.module .exports @@ -1147,16 +1144,18 @@ fn compile_function(c: &mut Compiler, t: TreeRef) -> CR { let tree = &c.syntax[t]; match tree.kind { TreeKind::FunctionDecl => { - let block = tree.child_of_kind(c.syntax, TreeKind::Block)?; + let block = tree.child_of_kind(&c.syntax, TreeKind::Block)?; compile_expression(c, block); } TreeKind::ClassDecl => { - let count = tree.children_of_kind(c.syntax, TreeKind::FieldDecl).count(); + let count = tree + .children_of_kind(&c.syntax, TreeKind::FieldDecl) + .count(); for i in 0..count { c.push(Instruction::LoadArgument(count - 1 - i)); } - let name = tree.nth_token(1)?.as_str(); + let name = tree.nth_token(1)?.as_str(&c.source); let name_index = c.add_string(name.to_string()); c.push(Instruction::PushString(name_index)); c.push(Instruction::PushInt(t.index().try_into().unwrap())); @@ -1209,7 +1208,7 @@ fn compile_for_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR { // Figure out the variable. let vt = tree.nth_tree(1)?; let var = &c.syntax[vt]; - let id = var.nth_token(0)?; + let id = var.nth_token(0)?.as_str(&c.source); let body = tree.nth_tree(4)?; let env = c.semantics.environment_of(body); diff --git a/fine/src/lib.rs b/fine/src/lib.rs index 78808ee0..6d854f6e 100644 --- a/fine/src/lib.rs +++ b/fine/src/lib.rs @@ -1,4 +1,4 @@ -use std::fs; +use std::{fs, rc::Rc}; use compiler::compile; use parser::parse; @@ -11,6 +11,21 @@ pub mod semantics; pub mod tokens; pub mod vm; +// struct SourceModule { +// semantics: Rc, +// } + +// impl SourceModule { +// pub fn new(source: &str) -> Self { +// let source: Rc = source.into(); +// let (syntax, lines) = parse(&source); +// let semantics = Rc::new(Semantics::new(source, syntax, lines)); +// SourceModule { semantics } +// } +// } + +// struct Environment {} + pub fn process_file(file: &str) { let source = match fs::read_to_string(file) { Ok(c) => c, @@ -21,21 +36,21 @@ pub fn process_file(file: &str) { }; // What am I doing here? + let source: Rc = source.into(); let (tree, lines) = parse(&source); - let semantics = Semantics::new(&tree, &lines); + let semantics = Rc::new(Semantics::new(source, tree, lines)); check(&semantics); // OK now there might be errors. - let mut errors = semantics.snapshot_errors(); + let errors = semantics.snapshot_errors(); if errors.len() > 0 { - errors.reverse(); for e in errors { eprintln!("{file}: {}:{}: {}", e.start.0, e.start.1, e.message); } return; } - let module = compile(&semantics); + let module = compile(semantics); let main_function = module.functions[module.init].clone(); let mut context = Context::new(module.clone()); diff --git a/fine/src/parser.rs b/fine/src/parser.rs index 4527ebd5..313ad0dd 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -2,14 +2,15 @@ // https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html use crate::tokens::{Lines, Token, TokenKind, Tokens}; use std::fmt::Write as _; +use std::rc::Rc; use std::{cell::Cell, num::NonZeroU32}; -pub struct SyntaxTree<'a> { - trees: Vec>, +pub struct SyntaxTree { + trees: Vec, root: Option, } -impl<'a> SyntaxTree<'a> { +impl SyntaxTree { pub fn new() -> Self { SyntaxTree { trees: vec![], @@ -21,7 +22,7 @@ impl<'a> SyntaxTree<'a> { self.root } - pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef { + pub fn add_tree(&mut self, mut t: Tree) -> TreeRef { assert!(t.parent.is_none()); let tr = TreeRef::from_index(self.trees.len()); @@ -50,10 +51,10 @@ impl<'a> SyntaxTree<'a> { tr } - pub fn dump(&self, with_positions: bool) -> String { + pub fn dump(&self, source: &str, with_positions: bool) -> String { let mut output = String::new(); if let Some(r) = self.root { - self[r].dump(self, with_positions, &mut output); + self[r].dump(source, self, with_positions, &mut output); } output } @@ -102,15 +103,15 @@ impl<'a> SyntaxTree<'a> { } } -impl<'a> std::ops::Index for SyntaxTree<'a> { - type Output = Tree<'a>; +impl std::ops::Index for SyntaxTree { + type Output = Tree; fn index(&self, index: TreeRef) -> &Self::Output { &self.trees[index.index()] } } -impl<'a> std::ops::IndexMut for SyntaxTree<'a> { +impl std::ops::IndexMut for SyntaxTree { fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output { &mut self.trees[index.index()] } @@ -164,18 +165,20 @@ pub enum TreeKind { VariableBinding, WhileStatement, WildcardPattern, + + Import, } -pub struct Tree<'a> { +pub struct Tree { pub kind: TreeKind, pub parent: Option, // TODO: Do we actually need this? pub start_pos: usize, pub end_pos: usize, - pub children: Vec>, + pub children: Vec, } -impl<'a> Tree<'a> { - pub fn nth_token(&self, index: usize) -> Option<&Token<'a>> { +impl Tree { + pub fn nth_token(&self, index: usize) -> Option<&Token> { self.children .get(index) .map(|c| match c { @@ -215,27 +218,23 @@ impl<'a> Tree<'a> { self.children_of_kind(&s, kind).next() } - pub fn child_tree_of_kind<'b>( - &'b self, - s: &'b SyntaxTree<'a>, - kind: TreeKind, - ) -> Option<&'b Tree<'a>> { + pub fn child_tree_of_kind<'b>(&'b self, s: &'b SyntaxTree, kind: TreeKind) -> Option<&'b Tree> { self.child_of_kind(s, kind).map(|t| &s[t]) } - pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) { + pub fn dump(&self, source: &str, tree: &SyntaxTree, with_positions: bool, output: &mut String) { let _ = write!(output, "{:?}", self.kind); if with_positions { let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos); } let _ = write!(output, "\n"); for child in self.children.iter() { - child.dump_rec(2, tree, with_positions, output); + child.dump_rec(source, 2, tree, with_positions, output); } } } -impl<'a> std::fmt::Debug for Tree<'a> { +impl std::fmt::Debug for Tree { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:?} [{}-{})", self.kind, self.start_pos, self.end_pos) } @@ -256,16 +255,17 @@ impl TreeRef { } } -pub enum Child<'a> { - Token(Token<'a>), +pub enum Child { + Token(Token), Tree(TreeRef), } -impl<'a> Child<'a> { +impl Child { fn dump_rec( &self, + source: &str, indent: usize, - tree: &SyntaxTree<'a>, + tree: &SyntaxTree, with_positions: bool, output: &mut String, ) { @@ -274,9 +274,9 @@ impl<'a> Child<'a> { } match self { Child::Token(t) => { - let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str()); + let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str(source)); if with_positions { - let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len()); + let _ = write!(output, " [{}, {})", t.start(), t.end()); } let _ = write!(output, "\n"); } @@ -289,7 +289,7 @@ impl<'a> Child<'a> { let _ = write!(output, "\n"); for child in t.children.iter() { - child.dump_rec(indent + 2, tree, with_positions, output); + child.dump_rec(source, indent + 2, tree, with_positions, output); } } } @@ -297,23 +297,23 @@ impl<'a> Child<'a> { pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize { match &self { - Child::Token(t) => t.start, + Child::Token(t) => t.start(), Child::Tree(t) => syntax_tree[*t].start_pos, } } pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize { match &self { - Child::Token(t) => t.start + t.as_str().len(), + Child::Token(t) => t.end(), Child::Tree(t) => syntax_tree[*t].end_pos, } } } -enum ParseEvent<'a> { +enum ParseEvent { Start { kind: TreeKind }, End, - Advance { token: Token<'a> }, + Advance { token: Token }, } struct MarkStarted { @@ -326,10 +326,10 @@ struct MarkClosed { struct CParser<'a> { tokens: Tokens<'a>, - current: Token<'a>, - next: Token<'a>, + current: Token, + next: Token, fuel: Cell, - events: Vec>, + events: Vec, panic: bool, } @@ -337,8 +337,8 @@ impl<'a> CParser<'a> { fn new(tokens: Tokens<'a>) -> Self { let mut parser = CParser { tokens, - current: Token::new(TokenKind::EOF, 0, ""), - next: Token::new(TokenKind::EOF, 0, ""), + current: Token::new(TokenKind::EOF, 0, 0), + next: Token::new(TokenKind::EOF, 0, 0), fuel: Cell::new(256), events: Vec::new(), panic: false, @@ -411,7 +411,8 @@ impl<'a> CParser<'a> { if self.fuel.get() == 0 { panic!( "parser is stuck at '{}' ({})!", - self.current, self.current.start + self.current.as_str(self.tokens.source()), + self.current.start() ); } self.fuel.set(self.fuel.get() - 1); @@ -422,7 +423,8 @@ impl<'a> CParser<'a> { if self.fuel.get() == 0 { panic!( "parser is stuck at '{}' ({})!", - self.current, self.current.start + self.current.as_str(self.tokens.source()), + self.current.start() ); } self.fuel.set(self.fuel.get() - 1); @@ -487,7 +489,7 @@ impl<'a> CParser<'a> { self.error_at(self.current.clone(), message) } - fn error_at(&mut self, token: Token<'a>, message: T) + fn error_at(&mut self, token: Token, message: T) where T: Into, { @@ -503,18 +505,18 @@ impl<'a> CParser<'a> { final_message.push_str("at end") } else if token.kind != TokenKind::Error { final_message.push_str("at '"); - final_message.push_str(token.as_str()); + final_message.push_str(token.as_str(self.tokens.source())); final_message.push_str("'"); } final_message.push_str(": "); final_message.push_str(&message); self.events.push(ParseEvent::Advance { - token: Token::error(token.start, final_message), + token: Token::error(token.start(), token.end(), final_message), }); } - fn build_tree(self) -> (SyntaxTree<'a>, Lines) { + fn build_tree(self) -> (Rc, Rc) { let mut events = self.events; let mut stack = Vec::new(); @@ -555,11 +557,11 @@ impl<'a> CParser<'a> { let root = result.add_tree(stack.pop().unwrap()); result.root = Some(root); - (result, self.tokens.lines()) + (Rc::new(result), Rc::new(self.tokens.lines())) } } -pub fn parse(source: &str) -> (SyntaxTree, Lines) { +pub fn parse(source: &str) -> (Rc, Rc) { let tokens = Tokens::new(source); let mut parser = CParser::new(tokens); @@ -572,6 +574,7 @@ fn file(p: &mut CParser) { let m = p.start(); while !p.eof() { match p.peek() { + TokenKind::Import => import(p), TokenKind::Class => class(p), TokenKind::RightBrace => { // An error parsing mismatched braces can leave me at an @@ -613,6 +616,26 @@ fn function(p: &mut CParser) { p.end(m, TreeKind::FunctionDecl); } +fn import(p: &mut CParser) { + let m = p.start(); + + p.expect_start(TokenKind::Import); + p.expect( + TokenKind::String, + "expected a string as the path of the module to import", + ); + p.expect( + TokenKind::As, + "expected `as` between the module string and identifier", + ); + p.expect( + TokenKind::Identifier, + "expected an identifier for the module", + ); + + p.end(m, TreeKind::Import); +} + fn class(p: &mut CParser) { let m = p.start(); diff --git a/fine/src/semantics.rs b/fine/src/semantics.rs index ea571030..0302204e 100644 --- a/fine/src/semantics.rs +++ b/fine/src/semantics.rs @@ -149,6 +149,9 @@ pub enum Type { // An alternate is one or another type. Alternate(Box<[Type]>), + + // A module of some kind. What module? + Module(Rc), } impl Type { @@ -163,19 +166,20 @@ impl Type { match self { Type::Error => 0, Type::Unreachable => 1, - Type::Assignment(_) => 2, - Type::TypeVariable(_) => 3, + Type::Assignment(..) => 2, + Type::TypeVariable(..) => 3, Type::Nothing => 4, Type::F64 => 5, Type::I64 => 6, Type::String => 7, Type::Bool => 8, - Type::Function(_, _) => 9, - Type::Method(_, _, _) => 10, - Type::List(_) => 11, - Type::Class(_, _) => 12, - Type::Object(_, _) => 13, - Type::Alternate(_) => 14, + Type::Function(..) => 9, + Type::Method(..) => 10, + Type::List(..) => 11, + Type::Class(..) => 12, + Type::Object(..) => 13, + Type::Alternate(..) => 14, + Type::Module(..) => 15, } } } @@ -242,6 +246,7 @@ impl fmt::Display for Type { } Ok(()) } + Module(name) => write!(f, "module {}", name), } } } @@ -335,6 +340,9 @@ pub enum Declaration { Class { declaration: TreeRef, //? }, + Import { + declaration: TreeRef, + }, } pub struct Environment { @@ -383,8 +391,8 @@ impl Environment { }) } - pub fn insert(&mut self, token: &Token, t: TreeRef) -> Option { - self.insert_name(token.as_str().into(), t) + pub fn insert(&mut self, token: &str, t: TreeRef) -> Option { + self.insert_name(token.into(), t) } pub fn insert_name(&mut self, name: Box, t: TreeRef) -> Option { @@ -400,14 +408,14 @@ impl Environment { result } - pub fn bind(&self, token: &Token) -> Option<&Declaration> { - if let Some(decl) = self.declarations.get(token.as_str()) { + pub fn bind(&self, token: &str) -> Option<&Declaration> { + if let Some(decl) = self.declarations.get(token) { return Some(decl); } let mut current = &self.parent; while let Some(env) = current { - if let Some(decl) = env.declarations.get(token.as_str()) { + if let Some(decl) = env.declarations.get(token) { return Some(decl); } current = &env.parent; @@ -597,11 +605,10 @@ enum Incremental { Complete(T), } -pub struct Semantics<'a> { - // TODO: Do I really want my own copy here? Should we standardize on Arc - // or Rc or some other nice sharing mechanism? - syntax_tree: &'a SyntaxTree<'a>, - lines: &'a Lines, +pub struct Semantics { + source: Rc, + syntax_tree: Rc, + lines: Rc, // Instead of physical parents, this is the set of *logical* parents. // This is what is used for binding. @@ -615,17 +622,18 @@ pub struct Semantics<'a> { classes: RefCell>>, } -impl<'a> Semantics<'a> { - pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self { +impl Semantics { + pub fn new(source: Rc, tree: Rc, lines: Rc) -> Self { let mut logical_parents = vec![None; tree.len()]; if let Some(root) = tree.root() { - set_logical_parents(&mut logical_parents, tree, root, None); + set_logical_parents(&mut logical_parents, &tree, root, None); } let root_environment = Environment::new(None, Location::Module); let mut semantics = Semantics { - syntax_tree: tree, + source, + syntax_tree: tree.clone(), lines, logical_parents, errors: RefCell::new(vec![]), @@ -645,8 +653,16 @@ impl<'a> Semantics<'a> { semantics } - pub fn tree(&self) -> &SyntaxTree<'a> { - &self.syntax_tree + pub fn source(&self) -> Rc { + self.source.clone() + } + + pub fn tree(&self) -> Rc { + self.syntax_tree.clone() + } + + pub fn lines(&self) -> Rc { + self.lines.clone() } pub fn snapshot_errors(&self) -> Vec { @@ -666,16 +682,6 @@ impl<'a> Semantics<'a> { } } - fn report_error(&self, position: usize, error: T) - where - T: ToString, - { - let (line, col) = self.lines.position(position); - self.errors - .borrow_mut() - .push(Error::new(line, col, error.to_string())); - } - fn report_error_span(&self, start: usize, end: usize, error: T) where T: ToString, @@ -687,7 +693,7 @@ impl<'a> Semantics<'a> { .push(Error::new_spanned(start, end, error.to_string())); } - fn report_error_tree(&self, tree: &Tree<'a>, error: T) + fn report_error_tree(&self, tree: &Tree, error: T) where T: ToString, { @@ -702,9 +708,6 @@ impl<'a> Semantics<'a> { self.report_error_span(tree.start_pos, tree.end_pos, error) } - // pub fn lvalue_declaration(&self, t: TreeRef) -> Option<&Declaration> { - // } - fn gather_errors(&mut self, tree: TreeRef) { let mut stack = vec![tree]; while let Some(tr) = stack.pop() { @@ -713,7 +716,7 @@ impl<'a> Semantics<'a> { match child { Child::Token(t) => { if t.kind == TokenKind::Error { - self.report_error(t.start, t.as_str()); + self.report_error_span(t.start(), t.end(), t.as_str(&self.source)); } } Child::Tree(t) => stack.push(*t), @@ -779,13 +782,16 @@ impl<'a> Semantics<'a> { }; let existing = environment.declarations.insert( - name.as_str().into(), + name.as_str(&self.source).into(), Declaration::Function { declaration: *t }, ); if existing.is_some() { self.report_error_tree( ct, - format!("duplicate definition of function '{name}'"), + format!( + "duplicate definition of function '{}'", + name.as_str(&self.source) + ), ); } } @@ -799,43 +805,63 @@ impl<'a> Semantics<'a> { fn environment_of_file(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef { let mut environment = Environment::new(Some(parent), Location::Module); + for child in tree.children.iter() { - match child { - Child::Tree(t) => { - let ct = &self.syntax_tree[*t]; - let binding = match ct.kind { - TreeKind::FunctionDecl => { - let Some(name) = ct.nth_token(1) else { - continue; - }; + let Child::Tree(t) = child else { + continue; + }; - let declaration = Declaration::Function { declaration: *t }; - Some(("function", name, declaration)) - } - TreeKind::ClassDecl => { - let Some(name) = ct.nth_token(1) else { - continue; - }; - - let declaration = Declaration::Class { declaration: *t }; - Some(("class", name, declaration)) - } - _ => None, + let ct = &self.syntax_tree[*t]; + let binding = match ct.kind { + TreeKind::FunctionDecl => { + let Some(name) = ct.nth_token(1) else { + continue; }; - - if let Some((what, name, declaration)) = binding { - let existing = environment - .declarations - .insert(name.as_str().into(), declaration); - if existing.is_some() { - self.report_error_tree( - ct, - format!("duplicate definition of {what} '{name}'"), - ); - } + if name.kind != TokenKind::Identifier { + continue; } + + let declaration = Declaration::Function { declaration: *t }; + Some(("function", name, declaration)) + } + TreeKind::ClassDecl => { + let Some(name) = ct.nth_token(1) else { + continue; + }; + if name.kind != TokenKind::Identifier { + continue; + } + + let declaration = Declaration::Class { declaration: *t }; + Some(("class", name, declaration)) + } + TreeKind::Import => { + let Some(name) = ct.nth_token(3) else { + continue; + }; + if name.kind != TokenKind::Identifier { + continue; + } + + let declaration = Declaration::Import { declaration: *t }; + Some(("import", name, declaration)) + } + _ => None, + }; + + if let Some((what, name, declaration)) = binding { + let existing = environment + .declarations + .insert(name.as_str(&self.source).into(), declaration); + if existing.is_some() { + self.report_error_tree( + ct, + format!( + "duplicate definition of {what} '{}'", + name.as_str(&self.source) + ), + ); } - _ => {} } } @@ -861,7 +887,7 @@ impl<'a> Semantics<'a> { }; let mut environment = Environment::new(Some(parent), location); - environment.insert(name, declaration); + environment.insert(name.as_str(&self.source), declaration); EnvironmentRef::new(environment) } @@ -878,7 +904,10 @@ impl<'a> Semantics<'a> { match param.kind { TreeKind::SelfParameter => { let param_name = param.nth_token(0).unwrap(); - if environment.insert(param_name, *ct).is_some() { + if environment + .insert(param_name.as_str(&self.source), *ct) + .is_some() + { self.report_error_tree( param, format!("duplicate definition of self parameter"), @@ -895,10 +924,11 @@ impl<'a> Semantics<'a> { continue; }; - if environment.insert(param_name, *ct).is_some() { + let param_str = param_name.as_str(&self.source); + if environment.insert(param_str, *ct).is_some() { self.report_error_tree( param, - format!("duplicate definition of parameter '{param_name}'"), + format!("duplicate definition of parameter '{param_str}'"), ); } } @@ -920,7 +950,7 @@ impl<'a> Semantics<'a> { }; let mut environment = Environment::new(Some(parent), Location::Local); - environment.insert(id, it); + environment.insert(id.as_str(&self.source), it); EnvironmentRef::new(environment) } @@ -928,7 +958,7 @@ impl<'a> Semantics<'a> { assert_eq!(tree.kind, TreeKind::IsExpression); // The environment of an `is` expression is the environment produced by the pattern. - let Some(pattern) = tree.child_tree_of_kind(self.syntax_tree, TreeKind::Pattern) else { + let Some(pattern) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::Pattern) else { // Should really have a pattern in there; otherwise there was a // parse error, don't make more trouble. return Environment::error(); @@ -950,7 +980,7 @@ impl<'a> Semantics<'a> { assert_eq!(tree.kind, TreeKind::MatchArm); // The environment of a `match arm` expression is the environment produced by the pattern. - let Some(pattern) = tree.child_tree_of_kind(self.syntax_tree, TreeKind::Pattern) else { + let Some(pattern) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::Pattern) else { // Should really have a pattern in there; otherwise there was a // parse error, don't make more trouble. return Environment::error(); @@ -989,7 +1019,7 @@ impl<'a> Semantics<'a> { ) -> EnvironmentRef { assert_eq!(tree.kind, TreeKind::Pattern); - let Some(binding) = tree.child_tree_of_kind(self.syntax_tree, TreeKind::VariableBinding) + let Some(binding) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::VariableBinding) else { // No binding, no new environment. return parent; @@ -999,7 +1029,7 @@ impl<'a> Semantics<'a> { }; let is_wildcard = tree - .child_of_kind(self.syntax_tree, TreeKind::WildcardPattern) + .child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern) .is_some(); let variable_decl = if is_wildcard { @@ -1010,7 +1040,7 @@ impl<'a> Semantics<'a> { } else { // Otherwise the binding is to the type expression which must // match for the variable to have a value. - let Some(type_expr) = tree.child_of_kind(self.syntax_tree, TreeKind::TypeExpression) + let Some(type_expr) = tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression) else { return Environment::error(); }; @@ -1020,7 +1050,7 @@ impl<'a> Semantics<'a> { // TODO: This binding should be un-assignable! Don't assign to this! let mut env = Environment::new(Some(parent), Location::Local); - env.insert(variable, variable_decl); + env.insert(variable.as_str(&self.source), variable_decl); EnvironmentRef::new(env) } @@ -1044,11 +1074,14 @@ impl<'a> Semantics<'a> { let tree = &self.syntax_tree[t]; assert_eq!(tree.kind, TreeKind::ClassDecl); - let name = tree.nth_token(1).map(|t| t.as_str()).unwrap_or(""); + let name = tree + .nth_token(1) + .map(|t| t.as_str(&self.source)) + .unwrap_or(""); // Fields let mut fields = Vec::new(); - for field in tree.children_of_kind(self.syntax_tree, TreeKind::FieldDecl) { + for field in tree.children_of_kind(&self.syntax_tree, TreeKind::FieldDecl) { let f = &self.syntax_tree[field]; if let Some(field_name) = f.nth_token(0) { let field_type = f @@ -1056,7 +1089,7 @@ impl<'a> Semantics<'a> { .map(|t| self.type_of(t)) .unwrap_or(Type::Error); fields.push(FieldDecl { - name: field_name.as_str().into(), + name: field_name.as_str(&self.source).into(), declaration: field, field_type, }); @@ -1065,7 +1098,7 @@ impl<'a> Semantics<'a> { // Methods let mut methods = Vec::new(); - for method in tree.children_of_kind(self.syntax_tree, TreeKind::FunctionDecl) { + for method in tree.children_of_kind(&self.syntax_tree, TreeKind::FunctionDecl) { let m = &self.syntax_tree[method]; if let Some(method_name) = m.nth_token(1) { // TODO: Check to see if it is actually a method, or if it is a static function. @@ -1073,7 +1106,7 @@ impl<'a> Semantics<'a> { match decl_type { Type::Method(..) => { methods.push(MethodDecl { - name: method_name.as_str().into(), + name: method_name.as_str(&self.source).into(), decl_type, declaration: method, is_static: false, @@ -1082,7 +1115,7 @@ impl<'a> Semantics<'a> { _ => { // TODO: Default to method or static? methods.push(MethodDecl { - name: method_name.as_str().into(), + name: method_name.as_str(&self.source).into(), decl_type, declaration: method, is_static: true, @@ -1287,6 +1320,7 @@ impl<'a> Semantics<'a> { TreeKind::TypeParameter => self.type_of_type_parameter(tree), TreeKind::UnaryExpression => self.type_of_unary(tree), TreeKind::WhileStatement => self.type_of_while(tree), + TreeKind::Import => self.type_of_import(tree), _ => self.internal_compiler_error(Some(t), "asking for a nonsense type"), }; @@ -1313,8 +1347,9 @@ impl<'a> Semantics<'a> { // This is dumb and should be punished, probably. (_, Type::Unreachable) => { - self.report_error( - op.start, + self.report_error_span( + op.start(), + op.end(), "cannot apply a unary operator to something that doesn't yield a value", ); Some(Type::Error) @@ -1324,11 +1359,12 @@ impl<'a> Semantics<'a> { (_, Type::Error) => Some(Type::Error), (_, arg_type) => { - self.report_error( - op.start, + self.report_error_span( + op.start(), + op.end(), format!( "cannot apply unary operator '{}' to value of type {}", - op.as_str(), + op.as_str(&self.source), arg_type ), ); @@ -1372,16 +1408,24 @@ impl<'a> Semantics<'a> { // This is dumb and should be punished, probably. (_, _, Type::Unreachable) => { - self.report_error( - op.start, - format!("cannot apply '{op}' to an argument that doesn't yield a value (on the right)"), + self.report_error_span( + op.start(), + op.end(), + format!( + "cannot apply '{}' to an argument that doesn't yield a value (on the right)", + op.as_str(&self.source) + ), ); Some(Type::Error) } (_, Type::Unreachable, _) => { - self.report_error( - op.start, - format!("cannot apply '{op}' to an argument that doesn't yield a value (on the left)"), + self.report_error_span( + op.start(), + op.end(), + format!( + "cannot apply '{}' to an argument that doesn't yield a value (on the left)", + op.as_str(&self.source) + ), ); Some(Type::Error) } @@ -1395,9 +1439,13 @@ impl<'a> Semantics<'a> { // Missed the whole table, it must be an error. (_, left_type, right_type) => { - self.report_error( - op.start, - format!("cannot apply binary operator '{op}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"), + self.report_error_span( + op.start(), + op.end(), + format!( + "cannot apply binary operator '{}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)", + op.as_str(&self.source) + ), ); Some(Type::Error) } @@ -1420,7 +1468,7 @@ impl<'a> Semantics<'a> { let declaration = match tree.kind { // TODO: Assign to list access TreeKind::Identifier => { - let id = tree.nth_token(0)?; + let id = tree.nth_token(0)?.as_str(&self.source); environment = self.environment_of(left_tree); match environment.bind(id) { Some(decl) => decl, @@ -1433,7 +1481,7 @@ impl<'a> Semantics<'a> { } } TreeKind::MemberAccess => { - let id = tree.nth_token(2)?; + let id = tree.nth_token(2)?.as_str(&self.source); let typ = self.type_of(tree.nth_tree(0)?); environment = self.member_environment(left_tree, &typ); match environment.bind(id) { @@ -1471,6 +1519,13 @@ impl<'a> Semantics<'a> { ); return Some(Type::Error); } + Declaration::Import { .. } => { + self.report_error_tree_ref( + left_tree, + "cannot assign a new value to an imported module", + ); + return Some(Type::Error); + } } let _ = environment; @@ -1489,8 +1544,9 @@ impl<'a> Semantics<'a> { } else if self.can_convert(&right_type, &left_type) { Some(Type::Assignment(Box::new(left_type))) } else { - self.report_error( - op.start, + self.report_error_span( + op.start(), + op.end(), format!("cannot assign a value of type '{right_type}' to type '{left_type}'"), ); Some(Type::Error) @@ -1506,15 +1562,15 @@ impl<'a> Semantics<'a> { assert_eq!(tree.kind, TreeKind::TypeIdentifier); // TODO: This will *clearly* need to get better. - let token = tree.nth_token(0)?; - match token.as_str() { + let token = tree.nth_token(0)?.as_str(&self.source); + match token { "f64" => Some(Type::F64), "string" => Some(Type::String), "bool" => Some(Type::Bool), "nothing" => Some(Type::Nothing), "list" => { let args = - tree.child_tree_of_kind(self.syntax_tree, TreeKind::TypeParameterList)?; + tree.child_tree_of_kind(&self.syntax_tree, TreeKind::TypeParameterList)?; let mut arg_types: Vec<_> = args.child_trees().map(|t| self.type_of(t)).collect(); if arg_types.len() != 1 { @@ -1544,6 +1600,13 @@ impl<'a> Semantics<'a> { ); Some(Type::Error) } + Some(Declaration::Import { .. }) => { + self.report_error_tree( + tree, + format!("'{token}' is an imported module and cannot be used as a type"), + ); + Some(Type::Error) + } None => { if !environment.is_error { self.report_error_tree(tree, format!("Unrecognized type: '{token}'")); @@ -1609,7 +1672,10 @@ impl<'a> Semantics<'a> { TokenKind::Number => Type::F64, TokenKind::String => Type::String, TokenKind::True | TokenKind::False => Type::Bool, - _ => panic!("the token {tok} doesn't have a type!"), + _ => panic!( + "the token {} doesn't have a type!", + tok.as_str(&self.source) + ), }; Some(pig) } @@ -1766,9 +1832,14 @@ impl<'a> Semantics<'a> { return Some(Type::Error); } - let Some(declaration) = env.bind(id) else { + let id_str = id.as_str(&self.source); + let Some(declaration) = env.bind(id_str) else { if !env.is_error { - self.report_error(id.start, format!("'{typ}' has no member {id}")); + self.report_error_span( + id.start(), + id.end(), + format!("'{typ}' has no member {id_str}"), + ); } return Some(Type::Error); }; @@ -1786,6 +1857,10 @@ impl<'a> Semantics<'a> { let class = self.class_of(*ct); class.static_env.clone() } + // Type::Module(_name) => { + // // Woof. Would like to bind this now. + // todo!(); + // } Type::Error => return Environment::error(), _ => { self.report_error_tree_ref(t, format!("cannot access members of '{typ}'")); @@ -1820,7 +1895,7 @@ impl<'a> Semantics<'a> { fn type_of_identifier(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Identifier); - let id = tree.nth_token(0)?; + let id = tree.nth_token(0)?.as_str(&self.source); let environment = self.environment_of(t); if let Some(declaration) = environment.bind(id) { return Some(self.type_of_declaration(t, declaration)); @@ -1836,6 +1911,7 @@ impl<'a> Semantics<'a> { match declaration { Declaration::Variable { declaration, .. } => self.type_of(*declaration), Declaration::Function { declaration, .. } => self.type_of(*declaration), + Declaration::Import { declaration, .. } => self.type_of(*declaration), Declaration::ExternFunction { declaration_type, .. } => declaration_type.clone(), @@ -1867,7 +1943,7 @@ impl<'a> Semantics<'a> { fn type_of_self_reference(&self, t: TreeRef, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::SelfReference); - let id = tree.nth_token(0)?; + let id = tree.nth_token(0)?.as_str(&self.source); let environment = self.environment_of(t); if let Some(declaration) = environment.bind(id) { return Some(match declaration { @@ -1886,7 +1962,7 @@ impl<'a> Semantics<'a> { } fn type_of_function_decl(&self, tree: &Tree) -> Option { - let param_list = tree.child_tree_of_kind(self.syntax_tree, TreeKind::ParamList)?; + let param_list = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::ParamList)?; // NOTE: The methodness here is determined by the presence of a self // parameter, even if that parameter is incorrect (e.g., this @@ -1905,7 +1981,7 @@ impl<'a> Semantics<'a> { } } - let return_type = match tree.child_of_kind(self.syntax_tree, TreeKind::ReturnType) { + let return_type = match tree.child_of_kind(&self.syntax_tree, TreeKind::ReturnType) { Some(t) => self.type_of(t), None => Type::Nothing, }; @@ -1919,7 +1995,7 @@ impl<'a> Semantics<'a> { fn type_of_parameter(&self, tree: &Tree) -> Option { assert_eq!(tree.kind, TreeKind::Parameter); - match tree.child_of_kind(self.syntax_tree, TreeKind::TypeExpression) { + match tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression) { Some(t) => Some(self.type_of(t)), None => { self.report_error_tree(tree, format!("the parameter is missing a type")); @@ -1991,7 +2067,7 @@ impl<'a> Semantics<'a> { // The details of a class are computed lazily, but this is enough of // a belly-button. let name = tree.nth_token(1)?; - Some(Type::Object(t, name.as_str().into())) + Some(Type::Object(t, name.as_str(&self.source).into())) } fn type_of_field_decl(&self, tree: &Tree) -> Option { @@ -2013,7 +2089,7 @@ impl<'a> Semantics<'a> { // Form 2: { x, ... } let environment = self.environment_of(t); - let id = tree.nth_token(0)?; + let id = tree.nth_token(0)?.as_str(&self.source); let declaration = match environment.bind(id) { Some(d) => d, None => { @@ -2038,6 +2114,13 @@ impl<'a> Semantics<'a> { ); Some(Type::Error) } + Declaration::Import { .. } => { + self.report_error_tree( + tree, + format!("'{id}' is an imported module, and cannot be the value of a field"), + ); + Some(Type::Error) + } } } @@ -2063,12 +2146,12 @@ impl<'a> Semantics<'a> { } fn type_of_match_expression(&self, tree: &Tree) -> Option { - Some(self.type_of(tree.child_of_kind(self.syntax_tree, TreeKind::MatchBody)?)) + Some(self.type_of(tree.child_of_kind(&self.syntax_tree, TreeKind::MatchBody)?)) } fn type_of_match_body(&self, tree: &Tree) -> Option { let arms: Vec<_> = tree - .children_of_kind(self.syntax_tree, TreeKind::MatchArm) + .children_of_kind(&self.syntax_tree, TreeKind::MatchArm) .collect(); if arms.len() == 0 { @@ -2106,7 +2189,15 @@ impl<'a> Semantics<'a> { fn type_of_pattern(&self, tree: &Tree) -> Option { // We know that we have a type expression in here, that's what we're asking about. - Some(self.type_of(tree.child_of_kind(self.syntax_tree, TreeKind::TypeExpression)?)) + Some(self.type_of(tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression)?)) + } + + fn type_of_import(&self, tree: &Tree) -> Option { + let tok = tree.nth_token(1)?; + if tok.kind != TokenKind::String { + return Some(Type::Error); // Already reported as syntax error + } + Some(Type::Module(tok.as_str(&self.source).into())) } // TODO: Really want to TEST THIS also uh can we generate bytecode for functions and call it?? @@ -2118,10 +2209,10 @@ impl<'a> Semantics<'a> { TreeKind::LiteralExpression => { let tok = tree.nth_token(0)?; match self.type_of(t) { - Type::F64 => Some(StackValue::Float(tok.as_str().parse().unwrap())), + Type::F64 => Some(StackValue::Float(tok.as_str(&self.source).parse().unwrap())), Type::Bool => Some(StackValue::Bool(tok.kind == TokenKind::True)), Type::String => Some(StackValue::String( - string_constant_to_string(tok.as_str()).into(), + string_constant_to_string(tok.as_str(&self.source)).into(), )), Type::Nothing => Some(StackValue::Nothing), // ? _ => None, @@ -2132,7 +2223,7 @@ impl<'a> Semantics<'a> { let pt = tree.nth_tree(2)?; let pattern = &self.syntax_tree[pt]; if pattern - .child_of_kind(self.syntax_tree, TreeKind::WildcardPattern) + .child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern) .is_some() { Some(StackValue::Bool(true)) @@ -2240,7 +2331,7 @@ impl<'a> Semantics<'a> { pub fn dump_compiler_state(&self, tr: Option) { eprintln!("Parsed the tree as:"); - eprintln!("\n{}", self.syntax_tree.dump(true)); + eprintln!("\n{}", self.syntax_tree.dump(&self.source, true)); { let errors = self.snapshot_errors(); @@ -2288,6 +2379,9 @@ impl<'a> Semantics<'a> { Declaration::Class { declaration, .. } => { eprintln!(" (class {declaration:?})"); } + Declaration::Import { declaration, .. } => { + eprintln!(" (imported module {declaration:?})"); + } }; } environment = env.parent.clone(); @@ -2376,6 +2470,10 @@ pub fn check(s: &Semantics) { TreeKind::MatchExpression => {} TreeKind::WhileStatement => check_while_statement(s, tree), + + TreeKind::Import => { + // TODO: Check Import Statement + } } } } @@ -2399,12 +2497,12 @@ fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) { assert_eq!(tree.kind, TreeKind::FunctionDecl); let _ = s.environment_of(t); - let return_type_tree = tree.child_of_kind(s.syntax_tree, TreeKind::ReturnType); + let return_type_tree = tree.child_of_kind(&s.syntax_tree, TreeKind::ReturnType); let return_type = return_type_tree .map(|t| s.type_of(t)) .unwrap_or(Type::Nothing); - if let Some(body) = tree.child_of_kind(s.syntax_tree, TreeKind::Block) { + if let Some(body) = tree.child_of_kind(&s.syntax_tree, TreeKind::Block) { let body_type = s.type_of(body); if !s.can_convert(&body_type, &return_type) { // Just work very hard to get an appropriate error span. @@ -2418,8 +2516,7 @@ fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) { let end_tok = tree .nth_token(1) .unwrap_or_else(|| tree.nth_token(0).unwrap()); - let end_pos = end_tok.start + end_tok.as_str().len(); - (start, end_pos) + (start, end_tok.end()) }); s.report_error_span(start, end, format!("the body of this function yields a value of type '{body_type}', but callers expect this function to produce a '{return_type}'")); @@ -2434,9 +2531,11 @@ fn check_let(s: &Semantics, tree: &Tree) { let Some(expr) = tree.nth_tree(3) else { return }; if let Type::Method(..) = s.type_of(expr) { - let start = name.start; - let end = name.start + name.as_str().len(); - s.report_error_span(start, end, "methods cannot be assigned to variables"); + s.report_error_span( + name.start(), + name.end(), + "methods cannot be assigned to variables", + ); } } @@ -2491,7 +2590,7 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) { let Some(type_expression) = tree.nth_tree(1) else { return; }; - let Some(field_list) = tree.child_tree_of_kind(s.syntax_tree, TreeKind::FieldList) else { + let Some(field_list) = tree.child_tree_of_kind(&s.syntax_tree, TreeKind::FieldList) else { return; }; @@ -2502,11 +2601,11 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) { let mut any_errors = false; let mut field_bindings = HashMap::new(); - for field in field_list.children_of_kind(s.syntax_tree, TreeKind::FieldValue) { + for field in field_list.children_of_kind(&s.syntax_tree, TreeKind::FieldValue) { let f = &s.syntax_tree[field]; if let Some(name) = f.nth_token(0) { let field_type = s.type_of(field); - field_bindings.insert(name.as_str(), (field, field_type)); + field_bindings.insert(name.as_str(&s.source), (field, field_type)); } else { any_errors = true; } @@ -2552,12 +2651,13 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) { fn check_class_declaration(s: &Semantics, tree: &Tree) { let mut fields = HashMap::new(); - for field in tree.children_of_kind(s.syntax_tree, TreeKind::FieldDecl) { + for field in tree.children_of_kind(&s.syntax_tree, TreeKind::FieldDecl) { let f = &s.syntax_tree[field]; let Some(name) = f.nth_token(0) else { continue; }; - match fields.insert(name.as_str(), field) { + let name = name.as_str(&s.source); + match fields.insert(name, field) { Some(_) => { s.report_error_tree(f, format!("duplicate definition of field '{name}'")); } @@ -2595,7 +2695,7 @@ fn check_match_body(s: &Semantics, t: TreeRef, _tree: &Tree) { // https://doc.rust-lang.org/nightly/nightly-rustc/rustc_pattern_analysis/usefulness/index.html // let arms: Vec<_> = tree - // .children_of_kind(s.syntax_tree, TreeKind::MatchArm) + // .children_of_kind(&s.syntax_tree, TreeKind::MatchArm) // .collect(); // if arms.len() > 0 { @@ -2626,8 +2726,9 @@ mod tests { #[test] #[should_panic(expected = "INTERNAL COMPILER ERROR: oh no")] pub fn ice() { - let (tree, lines) = parse("1 + 1"); - let semantics = Semantics::new(&tree, &lines); + let source: Rc = "1+1".into(); + let (tree, lines) = parse(&source); + let semantics = Semantics::new(source, tree.clone(), lines); semantics.internal_compiler_error(tree.root(), "oh no"); } } diff --git a/fine/src/tokens.rs b/fine/src/tokens.rs index 87271bf0..08d28c1c 100644 --- a/fine/src/tokens.rs +++ b/fine/src/tokens.rs @@ -37,6 +37,7 @@ pub enum TokenKind { Number, And, + As, Async, Await, Class, @@ -63,53 +64,60 @@ pub enum TokenKind { } // NOTE: Tokens are kinda big (like 40 bytes?) and AFAICT the only way to go -// smaller would be to stop using string pointers and use smaller -// sizes/offsets instead, e.g., 32b for offset and 32b for size, and -// stop tracking the position independently from the start, and then -// require the source text when converting to line/col. I'm unwilling to -// give up the ergonomics of &str and String right now, so we're just -// not doing it. +// smaller would be to find some other way to represent the error in an +// error token, but I'm kinda unwilling to do that. +// #[derive(Debug, PartialEq, Eq, Clone)] -pub struct Token<'a> { +pub struct Token { pub kind: TokenKind, - pub start: usize, - value: Result<&'a str, Box>, + start: usize, + end: usize, + error: Option>, } -impl<'a> Token<'a> { - pub fn new(kind: TokenKind, start: usize, value: &'a str) -> Self { +impl Token { + pub fn new(kind: TokenKind, start: usize, end: usize) -> Self { Token { kind, start, - value: Ok(value), + end, + error: None, } } - pub fn error(start: usize, message: String) -> Self { + pub fn error(start: usize, end: usize, message: String) -> Self { Token { kind: TokenKind::Error, start, - value: Err(message.into()), + end, + error: Some(message.into()), } } - pub fn as_str<'b>(&'b self) -> &'a str + pub fn start(&self) -> usize { + self.start + } + + pub fn end(&self) -> usize { + self.end + } + + pub fn len(&self) -> usize { + self.end() - self.start() + } + + pub fn as_str<'a, 'b>(&'a self, source: &'b str) -> &'a str where 'b: 'a, { - match &self.value { - Ok(v) => v, - Err(e) => &e, + if let Some(error) = &self.error { + &error + } else { + &source[self.start()..self.end()] } } } -impl<'a> std::fmt::Display for Token<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - pub struct Lines { newlines: Vec, } @@ -169,6 +177,10 @@ impl<'a> Tokens<'a> { result } + pub fn source(&self) -> &'a str { + self.source + } + pub fn lines(self) -> Lines { self.lines } @@ -179,12 +191,11 @@ impl<'a> Tokens<'a> { self.lines.token_position(token) } - fn token(&self, start: usize, kind: TokenKind) -> Token<'a> { - let value = &self.source[start..self.pos()]; - Token::new(kind, start, value) + fn token(&self, start: usize, kind: TokenKind) -> Token { + Token::new(kind, start, self.pos()) } - fn number(&mut self, start: usize) -> Token<'a> { + fn number(&mut self, start: usize) -> Token { // First, the main part. loop { if !self.matches_digit() { @@ -225,6 +236,7 @@ impl<'a> Tokens<'a> { let slice = &self.source[start..self.pos()]; return Token::error( start, + self.pos(), format!("Invalid floating-point literal: {slice}"), ); } @@ -238,10 +250,14 @@ impl<'a> Tokens<'a> { self.token(start, TokenKind::Number) } - fn string(&mut self, start: usize, delimiter: char) -> Token<'a> { + fn string(&mut self, start: usize, delimiter: char) -> Token { while !self.matches(delimiter) { if self.eof() { - return Token::error(start, "Unterminated string constant".to_string()); + return Token::error( + start, + self.pos(), + "Unterminated string constant".to_string(), + ); } if self.matches('\\') { self.advance(); @@ -259,6 +275,9 @@ impl<'a> Tokens<'a> { if ident == "and" { return TokenKind::And; } + if ident == "as" { + return TokenKind::As; + } if ident == "async" { return TokenKind::Async; } @@ -363,7 +382,7 @@ impl<'a> Tokens<'a> { TokenKind::Identifier } - fn identifier(&mut self, start: usize) -> Token<'a> { + fn identifier(&mut self, start: usize) -> Token { loop { // TODO: Use unicode identifier classes instead if !self.matches_next(|c| c.is_ascii_alphanumeric() || c == '_') { @@ -373,7 +392,7 @@ impl<'a> Tokens<'a> { let ident = &self.source[start..self.pos()]; let kind = Self::identifier_token_kind(ident); - Token::new(kind, start, ident) + Token::new(kind, start, self.pos()) } fn matches(&mut self, ch: char) -> bool { @@ -420,7 +439,7 @@ impl<'a> Tokens<'a> { self.next_char.is_none() } - fn whitespace(&mut self, pos: usize) -> Token<'a> { + fn whitespace(&mut self, pos: usize) -> Token { while let Some((pos, ch)) = self.next_char { if ch == '\n' { self.lines.add_line(pos); @@ -432,7 +451,7 @@ impl<'a> Tokens<'a> { self.token(pos, TokenKind::Whitespace) } - fn comment(&mut self, pos: usize) -> Token<'a> { + fn comment(&mut self, pos: usize) -> Token { while let Some((_, ch)) = self.next_char { if ch == '\n' { break; @@ -442,7 +461,7 @@ impl<'a> Tokens<'a> { self.token(pos, TokenKind::Comment) } - pub fn next(&mut self) -> Token<'a> { + pub fn next(&mut self) -> Token { let (pos, c) = match self.advance() { Some((p, c)) => (p, c), None => return self.token(self.source.len(), TokenKind::EOF), @@ -516,7 +535,7 @@ impl<'a> Tokens<'a> { } else if c.is_ascii_alphabetic() || c == '_' { self.identifier(pos) } else { - Token::error(pos, format!("Unexpected character '{c}'")) + Token::error(pos, self.pos(), format!("Unexpected character '{c}'")) } } } @@ -552,9 +571,9 @@ mod tests { let mut expected: Vec = (vec![$($s),*]) .into_iter() - .map(|t| Token::new(t.1, t.0, t.2)) + .map(|t| Token::new(t.1, t.0, t.0 + t.2.len())) .collect(); - expected.push(Token::new(TokenKind::EOF, $input.len(), "")); + expected.push(Token::new(TokenKind::EOF, $input.len(), $input.len())); test_tokens_impl($input, expected); } @@ -611,11 +630,12 @@ mod tests { test_tokens!( more_more_keywords, - "in is match _", + "in is match _ as", (0, In, "in"), (3, Is, "is"), (6, Match, "match"), - (12, Underscore, "_") + (12, Underscore, "_"), + (14, As, "as") ); test_tokens!( diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs index 3f56156a..298e0884 100644 --- a/fine/tests/example_tests.rs +++ b/fine/tests/example_tests.rs @@ -5,6 +5,7 @@ use fine::tokens::Lines; use fine::vm::{eval_export_fn, Context}; use pretty_assertions::assert_eq; use std::fmt::Write as _; +use std::rc::Rc; fn rebase_section(source_path: &str, section: &str, value: &str) { let contents = std::fs::read_to_string(source_path) @@ -83,8 +84,8 @@ fn should_rebase() -> bool { } } -fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) { - let dump = tree.dump(false); +fn assert_concrete(source: Rc, tree: Rc, expected: &str, source_path: &str) { + let dump = tree.dump(&source, false); if dump != expected { if should_rebase() { rebase_section(source_path, "concrete", &dump) @@ -128,13 +129,14 @@ macro_rules! semantic_assert_eq { } fn assert_type_at( - tree: &SyntaxTree, - lines: &Lines, + source: Rc, + tree: Rc, + lines: Rc, pos: usize, expected: &str, _source_path: &str, ) { - let semantics = Semantics::new(tree, lines); + let semantics = Semantics::new(source, tree.clone(), lines); let tree_ref = match tree.find_tree_at(pos) { Some(t) => t, None => semantic_panic!( @@ -156,13 +158,14 @@ fn assert_type_at( } fn assert_type_error_at( - tree: &SyntaxTree, - lines: &Lines, + source: Rc, + tree: Rc, + lines: Rc, pos: usize, expected: &str, _source_path: &str, ) { - let semantics = Semantics::new(tree, lines); + let semantics = Semantics::new(source, tree.clone(), lines); let tree_ref = match tree.find_tree_at(pos) { Some(t) => t, None => semantic_panic!( @@ -222,9 +225,15 @@ fn dump_module(out: &mut String, module: &Module) -> std::fmt::Result { Ok(()) } -fn assert_compiles_to(tree: &SyntaxTree, lines: &Lines, expected: &str, source_path: &str) { - let semantics = Semantics::new(tree, lines); - let module = compile(&semantics); +fn assert_compiles_to( + source: Rc, + tree: Rc, + lines: Rc, + expected: &str, + source_path: &str, +) { + let semantics = Rc::new(Semantics::new(source, tree, lines)); + let module = compile(semantics.clone()); let mut actual = String::new(); dump_module(&mut actual, &module).expect("no dumping?"); @@ -244,8 +253,8 @@ fn assert_compiles_to(tree: &SyntaxTree, lines: &Lines, expected: &str, source_p } } -fn assert_no_errors(tree: &SyntaxTree, lines: &Lines) { - let semantics = Semantics::new(tree, lines); +fn assert_no_errors(source: Rc, tree: Rc, lines: Rc) { + let semantics = Semantics::new(source, tree, lines); check(&semantics); let expected_errors: Vec = Vec::new(); @@ -259,10 +268,10 @@ fn assert_no_errors(tree: &SyntaxTree, lines: &Lines) { ); } -fn assert_eval_ok(tree: &SyntaxTree, lines: &Lines, expected: &str) { - let semantics = Semantics::new(tree, lines); +fn assert_eval_ok(source: Rc, tree: Rc, lines: Rc, expected: &str) { + let semantics = Rc::new(Semantics::new(source, tree, lines)); - let module = compile(&semantics); + let module = compile(semantics.clone()); let mut context = Context::new(module.clone()); context.init().expect("Unable to initialize module"); @@ -301,8 +310,13 @@ fn assert_eval_ok(tree: &SyntaxTree, lines: &Lines, expected: &str) { } } -fn assert_errors(tree: &SyntaxTree, lines: &Lines, expected_errors: Vec<&str>) { - let semantics = Semantics::new(tree, lines); +fn assert_errors( + source: Rc, + tree: Rc, + lines: Rc, + expected_errors: Vec<&str>, +) { + let semantics = Semantics::new(source, tree, lines); check(&semantics); let errors: Vec = semantics @@ -320,8 +334,8 @@ fn assert_errors(tree: &SyntaxTree, lines: &Lines, expected_errors: Vec<&str>) { ); } -fn assert_check_error(tree: &SyntaxTree, lines: &Lines, expected: &str) { - let semantics = Semantics::new(tree, lines); +fn assert_check_error(source: Rc, tree: Rc, lines: Rc, expected: &str) { + let semantics = Semantics::new(source, tree, lines); check(&semantics); let errors = semantics.snapshot_errors();