From 52e1879ef4475c242b1924ff5b5037bf2ccda451 Mon Sep 17 00:00:00 2001 From: John Doty Date: Sat, 6 Apr 2024 10:00:22 -0700 Subject: [PATCH] [fine] Loader takes Option, global module map Just some hacking on modules --- fine/TODO | 1 - fine/src/program.rs | 99 +++++++++++++++++++++++++++---------- fine/src/semantics.rs | 79 ++++++++++++++++------------- fine/tests/example_tests.rs | 7 ++- 4 files changed, 121 insertions(+), 65 deletions(-) diff --git a/fine/TODO b/fine/TODO index 33d3eccf..81e2620e 100644 --- a/fine/TODO +++ b/fine/TODO @@ -1,6 +1,5 @@ - The Export enum is stupid I think, for runtime modules. Why do we even have them? We should just put all the names in, like Declaration {} but for runtime. -- Module IDs must be globally resolvable from within a given semantics object - When adding PANIC instructions, push a diagnostic that I find if I can find one instead of a hard-coded string. - runtime should have `new` with 0 args and `with_loader` that does the boxing, and `new` should just make the standard one \ No newline at end of file diff --git a/fine/src/program.rs b/fine/src/program.rs index 48d0a913..88a97c1e 100644 --- a/fine/src/program.rs +++ b/fine/src/program.rs @@ -1,34 +1,65 @@ use std::{collections::HashMap, fs, path::PathBuf, rc::Rc}; use crate::parser::parse; -use crate::semantics::{check, Error, ImportRecord, ModuleId, Semantics}; +use crate::semantics::{check, Error, ModuleId, ModuleTable, Semantics}; +/// The "source" of a module. The idea is that eventually different module +/// loaders could, like, provide us "external" modules or something. +/// +/// For now we're only dealing with source code-based modules, though. pub enum ModuleSource { + /// This module works based on source text which needs to be parsed and + /// analyzed and whatnot. SourceText(String), } #[derive(Debug)] pub enum ModuleLoadError { + /// Some IO error occurred while loading the module. IO(String, std::io::Error), } +/// `ModuleLoader` is the trait to implement if you can load modules. Loading +/// modules has two parts: first, to resolve an import into a full, canonical +/// module name, and second, to load a module based on its full, canonical +/// module name. +/// +/// A full, canonical module name can be whatever you want it to be. By +/// default it's the canonical path of a file on disk, and +/// `normalize_module_name` resolves relative module names into canonical +/// paths based on the name of the module doing the importing. pub trait ModuleLoader { - fn normalize_module_name(&self, source: &str, name: String) -> String; + /// Convert a module name as seen in a fine program to a full, canonical + /// module name, whatever that means to you. The full, canonical name of + /// the module that contains the import is provided to you, so that you + /// can (for example) use it to resolve relative paths into absolute + /// ones. (The source name is `None` if this is some kind of root + /// module.) + fn normalize_module_name(&self, source: Option<&str>, name: String) -> String; + + /// Load a module based on the full, canonical name of a module. (You + /// provided us with this name in a previous call to + /// normalize_module_name, so don't pretend you don't understand it.) fn load_module(&self, name: &String) -> Result; } +/// The standard implementation of a module loader, which loads files from +/// the file system. pub struct StandardModuleLoader { base_path: PathBuf, } impl StandardModuleLoader { + /// Construct a new standard module loader that loads files relative to + /// the provided path. pub fn new(base_path: PathBuf) -> Self { StandardModuleLoader { base_path } } } impl ModuleLoader for StandardModuleLoader { - fn normalize_module_name(&self, source: &str, name: String) -> String { + fn normalize_module_name(&self, source: Option<&str>, name: String) -> String { + let source = source.unwrap_or(""); let p = self.base_path.join(source).join(name.clone()); let result = match std::fs::canonicalize(&p) { Ok(p) => match p.into_os_string().into_string() { @@ -63,12 +94,6 @@ impl Module { } } -struct PendingModule { - mid: ModuleId, - imports: Vec<(String, String)>, // (raw, normalized) - semantics: Rc, -} - pub struct Program { next_module_id: u64, modules: HashMap>, @@ -98,9 +123,15 @@ impl Program { &mut self, name: &str, ) -> Result<(Vec>, Rc), ModuleLoadError> { + struct PendingModule { + mid: ModuleId, + imports: Vec<(String, String)>, // (raw, normalized) + semantics: Rc, + } + let mut init_pending = HashMap::new(); let mut names = Vec::new(); - let name = self.loader.normalize_module_name("", name.to_string()); + let name = self.loader.normalize_module_name(None, name.to_string()); names.push(name.clone()); let mut id_assign = self.next_module_id; @@ -129,7 +160,9 @@ impl Program { let mut imports = Vec::new(); for import in semantics.imports() { - let normalized = self.loader.normalize_module_name(&name, import.clone()); + let normalized = self + .loader + .normalize_module_name(Some(&name), import.clone()); names.push(normalized.clone()); imports.push((import, normalized)); @@ -150,25 +183,18 @@ impl Program { for (_, pending) in init_pending.iter() { let mut import_table = HashMap::new(); for (import, normalized) in pending.imports.iter() { - // NOTE: We look up the load(ed|ing) module here by normalized name, because that's how - // we track it... + // NOTE: We look up the load(ed|ing) module here by + // normalized name, because that's how we track it... let target = if let Some(module) = self.modules.get(&*normalized) { - ImportRecord { - name: normalized.clone(), - module_id: module.id(), - semantics: Rc::downgrade(&module.semantics), - } + module.id } else { let other = init_pending.get(&*normalized).unwrap(); - ImportRecord { - name: normalized.clone(), - module_id: other.mid, - semantics: Rc::downgrade(&other.semantics), - } + other.mid }; // ...but we set it into the import table here with the name - // that the source code used, for more better binding. + // that the source code used, because that's how the + // semantics needs to find it. import_table.insert(import.clone(), target); } @@ -176,10 +202,9 @@ impl Program { pending.semantics.set_imports(import_table); } - let mut errors = Vec::new(); + let mut to_check = Vec::new(); for (name, pending) in init_pending.into_iter() { - check(&pending.semantics); - errors.append(&mut pending.semantics.snapshot_errors()); + to_check.push(pending.semantics.clone()); let module = Rc::new(Module { id: pending.mid, @@ -190,6 +215,26 @@ impl Program { } self.next_module_id = id_assign; + // Rebuild the module map for everybody. + let mut module_table = ModuleTable::new(); + for m in self.modules.values() { + // NOTE: self.modules keeps all the semantics alive; but to avoid + // cycles the module table itself contains weak pointers. + module_table.set_module(m.id, Rc::downgrade(&m.semantics)); + } + let module_table = Rc::new(module_table); + for m in self.modules.values() { + m.semantics.set_module_table(module_table.clone()); + } + + // Check and report errors. (After the module map is set, so imports + // can be resolved correctly!) + let mut errors = Vec::new(); + for semantics in to_check { + check(&semantics); + errors.append(&mut semantics.snapshot_errors()); + } + let result = self.modules.get(&name).unwrap().clone(); Ok((errors, result)) } diff --git a/fine/src/semantics.rs b/fine/src/semantics.rs index 976fd202..6571c397 100644 --- a/fine/src/semantics.rs +++ b/fine/src/semantics.rs @@ -122,11 +122,28 @@ impl fmt::Display for ModuleId { } } -#[derive(Clone, Debug)] -pub struct ImportRecord { - pub name: String, - pub module_id: ModuleId, - pub semantics: Weak, +pub struct ModuleTable { + modules: HashMap>, +} + +impl ModuleTable { + pub fn new() -> ModuleTable { + ModuleTable { + modules: HashMap::new(), + } + } + + pub fn set_module(&mut self, id: ModuleId, semantics: Weak) { + self.modules.insert(id, semantics); + } + + pub fn get_module(&self, id: &ModuleId) -> Option> { + self.modules.get(id).map(|s| s.upgrade()).flatten() + } + + pub fn iter(&self) -> std::collections::hash_map::Iter<'_, ModuleId, Weak> { + self.modules.iter() + } } #[derive(Clone)] @@ -181,7 +198,7 @@ pub enum Type { Alternate(Box<[Type]>), // A module of some kind. What module? - Module(Rc, ImportRecord), + Module(Rc, ModuleId), } impl Type { @@ -670,12 +687,6 @@ enum Incremental { Complete(T), } -#[derive(Debug)] -struct ImportMap { - by_name: HashMap, - by_id: HashMap, -} - pub struct Semantics { mid: ModuleId, file: Rc, @@ -683,7 +694,8 @@ pub struct Semantics { syntax_tree: Rc, lines: Rc, - import_map: OnceCell, + module_table: RefCell>, + import_map: OnceCell>, // Instead of physical parents, this is the set of *logical* parents. // This is what is used for binding. @@ -734,6 +746,7 @@ impl Semantics { source, syntax_tree: tree.clone(), lines, + module_table: RefCell::new(Rc::new(ModuleTable::new())), import_map: OnceCell::new(), logical_parents, function_count, @@ -755,31 +768,21 @@ impl Semantics { semantics } - pub fn set_imports(&self, imports: HashMap) { - let mut by_id = HashMap::new(); - for (_, v) in imports.iter() { - by_id.insert(v.module_id, v.clone()); - } - let imports = ImportMap { - by_name: imports, - by_id, - }; - + pub fn set_imports(&self, imports: HashMap) { self.import_map.set(imports).expect("imports already set"); } + pub fn set_module_table(&self, table: Rc) { + self.module_table.replace(table); + } + pub fn import_ids(&self) -> Vec { - // TODO: Pull from by_name when we go global let import_map = self.import_map.get().unwrap(); - import_map.by_id.keys().map(|id| *id).collect() + import_map.values().map(|id| *id).collect() } pub fn import_by_id(&self, mid: ModuleId) -> Option> { - // TODO: ACTUALLY THIS IS WRONG, WE NEED THE GLOBAL MAP HERE, NOT THE LOCAL ONE. - let import_map = self.import_map.get()?; - let record = import_map.by_id.get(&mid)?; - - record.semantics.upgrade() + self.module_table.borrow().get_module(&mid) } pub fn source(&self) -> Rc { @@ -2192,7 +2195,7 @@ impl Semantics { } Type::Module(_, import) => { // TODO: Cache this somehow, man. - let Some(other) = import.semantics.upgrade() else { + let Some(other) = self.import_by_id(*import) else { self.internal_compiler_error(Some(t), "Unable to bind module"); }; @@ -2601,8 +2604,8 @@ impl Semantics { self.internal_compiler_error(None, "import map not initialized"); }; - match import_map.by_name.get(&name) { - Some(import) => Some(Type::Module(name.into(), import.clone())), + match import_map.get(&name) { + Some(import) => Some(Type::Module(name.into(), *import)), None => { let error = self.report_error_tree(tree, format!("unable to resolve module import {name}")); @@ -2766,8 +2769,8 @@ impl Semantics { match self.import_map.get() { Some(m) => { eprintln!("Import map:"); - for (k, b) in m.by_name.iter() { - eprintln!(" {k} => {} ({:?})", b.name, b.module_id); + for (k, b) in m.iter() { + eprintln!(" {k} => {:?}", b); } eprintln!(); } @@ -2775,6 +2778,12 @@ impl Semantics { eprintln!("The import map is not set.\n") } } + + eprintln!("Module Table:"); + for (id, _sem) in self.module_table.borrow().iter() { + eprintln!(" {:?} => ??", id); + } + eprintln!(); } if let Some(tr) = tr { diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs index eede64c1..849cc1d5 100644 --- a/fine/tests/example_tests.rs +++ b/fine/tests/example_tests.rs @@ -153,11 +153,14 @@ impl TestLoader { } impl ModuleLoader for TestLoader { - fn normalize_module_name(&self, base: &str, name: String) -> String { + fn normalize_module_name(&self, base: Option<&str>, name: String) -> String { if name == "__test__" { name } else { - let base = if base == "__test__" { "" } else { base }; + let base = match base { + Some("__test__") => None, + _ => base, + }; self.base.normalize_module_name(base, name) } }