[fine] Loader takes Option<base>, global module map

Just some hacking on modules
This commit is contained in:
John Doty 2024-04-06 10:00:22 -07:00
parent a3d4c24f11
commit 52e1879ef4
4 changed files with 121 additions and 65 deletions

View file

@ -1,6 +1,5 @@
- The Export enum is stupid I think, for runtime modules. Why do we even have them? We should just put all the names in, like Declaration {} but for runtime.
- Module IDs must be globally resolvable from within a given semantics object
- When adding PANIC instructions, push a diagnostic that I find if I can find one instead of a hard-coded string.
- runtime should have `new` with 0 args and `with_loader<T : ModuleLoader>` that does the boxing, and `new` should just make the standard one

View file

@ -1,34 +1,65 @@
use std::{collections::HashMap, fs, path::PathBuf, rc::Rc};
use crate::parser::parse;
use crate::semantics::{check, Error, ImportRecord, ModuleId, Semantics};
use crate::semantics::{check, Error, ModuleId, ModuleTable, Semantics};
/// The "source" of a module. The idea is that eventually different module
/// loaders could, like, provide us "external" modules or something.
///
/// For now we're only dealing with source code-based modules, though.
pub enum ModuleSource {
/// This module works based on source text which needs to be parsed and
/// analyzed and whatnot.
SourceText(String),
}
#[derive(Debug)]
pub enum ModuleLoadError {
/// Some IO error occurred while loading the module.
IO(String, std::io::Error),
}
/// `ModuleLoader` is the trait to implement if you can load modules. Loading
/// modules has two parts: first, to resolve an import into a full, canonical
/// module name, and second, to load a module based on its full, canonical
/// module name.
///
/// A full, canonical module name can be whatever you want it to be. By
/// default it's the canonical path of a file on disk, and
/// `normalize_module_name` resolves relative module names into canonical
/// paths based on the name of the module doing the importing.
pub trait ModuleLoader {
fn normalize_module_name(&self, source: &str, name: String) -> String;
/// Convert a module name as seen in a fine program to a full, canonical
/// module name, whatever that means to you. The full, canonical name of
/// the module that contains the import is provided to you, so that you
/// can (for example) use it to resolve relative paths into absolute
/// ones. (The source name is `None` if this is some kind of root
/// module.)
fn normalize_module_name(&self, source: Option<&str>, name: String) -> String;
/// Load a module based on the full, canonical name of a module. (You
/// provided us with this name in a previous call to
/// normalize_module_name, so don't pretend you don't understand it.)
fn load_module(&self, name: &String) -> Result<ModuleSource, ModuleLoadError>;
}
/// The standard implementation of a module loader, which loads files from
/// the file system.
pub struct StandardModuleLoader {
base_path: PathBuf,
}
impl StandardModuleLoader {
/// Construct a new standard module loader that loads files relative to
/// the provided path.
pub fn new(base_path: PathBuf) -> Self {
StandardModuleLoader { base_path }
}
}
impl ModuleLoader for StandardModuleLoader {
fn normalize_module_name(&self, source: &str, name: String) -> String {
fn normalize_module_name(&self, source: Option<&str>, name: String) -> String {
let source = source.unwrap_or("");
let p = self.base_path.join(source).join(name.clone());
let result = match std::fs::canonicalize(&p) {
Ok(p) => match p.into_os_string().into_string() {
@ -63,12 +94,6 @@ impl Module {
}
}
struct PendingModule {
mid: ModuleId,
imports: Vec<(String, String)>, // (raw, normalized)
semantics: Rc<Semantics>,
}
pub struct Program {
next_module_id: u64,
modules: HashMap<String, Rc<Module>>,
@ -98,9 +123,15 @@ impl Program {
&mut self,
name: &str,
) -> Result<(Vec<Rc<Error>>, Rc<Module>), ModuleLoadError> {
struct PendingModule {
mid: ModuleId,
imports: Vec<(String, String)>, // (raw, normalized)
semantics: Rc<Semantics>,
}
let mut init_pending = HashMap::new();
let mut names = Vec::new();
let name = self.loader.normalize_module_name("", name.to_string());
let name = self.loader.normalize_module_name(None, name.to_string());
names.push(name.clone());
let mut id_assign = self.next_module_id;
@ -129,7 +160,9 @@ impl Program {
let mut imports = Vec::new();
for import in semantics.imports() {
let normalized = self.loader.normalize_module_name(&name, import.clone());
let normalized = self
.loader
.normalize_module_name(Some(&name), import.clone());
names.push(normalized.clone());
imports.push((import, normalized));
@ -150,25 +183,18 @@ impl Program {
for (_, pending) in init_pending.iter() {
let mut import_table = HashMap::new();
for (import, normalized) in pending.imports.iter() {
// NOTE: We look up the load(ed|ing) module here by normalized name, because that's how
// we track it...
// NOTE: We look up the load(ed|ing) module here by
// normalized name, because that's how we track it...
let target = if let Some(module) = self.modules.get(&*normalized) {
ImportRecord {
name: normalized.clone(),
module_id: module.id(),
semantics: Rc::downgrade(&module.semantics),
}
module.id
} else {
let other = init_pending.get(&*normalized).unwrap();
ImportRecord {
name: normalized.clone(),
module_id: other.mid,
semantics: Rc::downgrade(&other.semantics),
}
other.mid
};
// ...but we set it into the import table here with the name
// that the source code used, for more better binding.
// that the source code used, because that's how the
// semantics needs to find it.
import_table.insert(import.clone(), target);
}
@ -176,10 +202,9 @@ impl Program {
pending.semantics.set_imports(import_table);
}
let mut errors = Vec::new();
let mut to_check = Vec::new();
for (name, pending) in init_pending.into_iter() {
check(&pending.semantics);
errors.append(&mut pending.semantics.snapshot_errors());
to_check.push(pending.semantics.clone());
let module = Rc::new(Module {
id: pending.mid,
@ -190,6 +215,26 @@ impl Program {
}
self.next_module_id = id_assign;
// Rebuild the module map for everybody.
let mut module_table = ModuleTable::new();
for m in self.modules.values() {
// NOTE: self.modules keeps all the semantics alive; but to avoid
// cycles the module table itself contains weak pointers.
module_table.set_module(m.id, Rc::downgrade(&m.semantics));
}
let module_table = Rc::new(module_table);
for m in self.modules.values() {
m.semantics.set_module_table(module_table.clone());
}
// Check and report errors. (After the module map is set, so imports
// can be resolved correctly!)
let mut errors = Vec::new();
for semantics in to_check {
check(&semantics);
errors.append(&mut semantics.snapshot_errors());
}
let result = self.modules.get(&name).unwrap().clone();
Ok((errors, result))
}

View file

@ -122,11 +122,28 @@ impl fmt::Display for ModuleId {
}
}
#[derive(Clone, Debug)]
pub struct ImportRecord {
pub name: String,
pub module_id: ModuleId,
pub semantics: Weak<Semantics>,
pub struct ModuleTable {
modules: HashMap<ModuleId, Weak<Semantics>>,
}
impl ModuleTable {
pub fn new() -> ModuleTable {
ModuleTable {
modules: HashMap::new(),
}
}
pub fn set_module(&mut self, id: ModuleId, semantics: Weak<Semantics>) {
self.modules.insert(id, semantics);
}
pub fn get_module(&self, id: &ModuleId) -> Option<Rc<Semantics>> {
self.modules.get(id).map(|s| s.upgrade()).flatten()
}
pub fn iter(&self) -> std::collections::hash_map::Iter<'_, ModuleId, Weak<Semantics>> {
self.modules.iter()
}
}
#[derive(Clone)]
@ -181,7 +198,7 @@ pub enum Type {
Alternate(Box<[Type]>),
// A module of some kind. What module?
Module(Rc<str>, ImportRecord),
Module(Rc<str>, ModuleId),
}
impl Type {
@ -670,12 +687,6 @@ enum Incremental<T> {
Complete(T),
}
#[derive(Debug)]
struct ImportMap {
by_name: HashMap<String, ImportRecord>,
by_id: HashMap<ModuleId, ImportRecord>,
}
pub struct Semantics {
mid: ModuleId,
file: Rc<str>,
@ -683,7 +694,8 @@ pub struct Semantics {
syntax_tree: Rc<SyntaxTree>,
lines: Rc<Lines>,
import_map: OnceCell<ImportMap>,
module_table: RefCell<Rc<ModuleTable>>,
import_map: OnceCell<HashMap<String, ModuleId>>,
// Instead of physical parents, this is the set of *logical* parents.
// This is what is used for binding.
@ -734,6 +746,7 @@ impl Semantics {
source,
syntax_tree: tree.clone(),
lines,
module_table: RefCell::new(Rc::new(ModuleTable::new())),
import_map: OnceCell::new(),
logical_parents,
function_count,
@ -755,31 +768,21 @@ impl Semantics {
semantics
}
pub fn set_imports(&self, imports: HashMap<String, ImportRecord>) {
let mut by_id = HashMap::new();
for (_, v) in imports.iter() {
by_id.insert(v.module_id, v.clone());
}
let imports = ImportMap {
by_name: imports,
by_id,
};
pub fn set_imports(&self, imports: HashMap<String, ModuleId>) {
self.import_map.set(imports).expect("imports already set");
}
pub fn set_module_table(&self, table: Rc<ModuleTable>) {
self.module_table.replace(table);
}
pub fn import_ids(&self) -> Vec<ModuleId> {
// TODO: Pull from by_name when we go global
let import_map = self.import_map.get().unwrap();
import_map.by_id.keys().map(|id| *id).collect()
import_map.values().map(|id| *id).collect()
}
pub fn import_by_id(&self, mid: ModuleId) -> Option<Rc<Semantics>> {
// TODO: ACTUALLY THIS IS WRONG, WE NEED THE GLOBAL MAP HERE, NOT THE LOCAL ONE.
let import_map = self.import_map.get()?;
let record = import_map.by_id.get(&mid)?;
record.semantics.upgrade()
self.module_table.borrow().get_module(&mid)
}
pub fn source(&self) -> Rc<str> {
@ -2192,7 +2195,7 @@ impl Semantics {
}
Type::Module(_, import) => {
// TODO: Cache this somehow, man.
let Some(other) = import.semantics.upgrade() else {
let Some(other) = self.import_by_id(*import) else {
self.internal_compiler_error(Some(t), "Unable to bind module");
};
@ -2601,8 +2604,8 @@ impl Semantics {
self.internal_compiler_error(None, "import map not initialized");
};
match import_map.by_name.get(&name) {
Some(import) => Some(Type::Module(name.into(), import.clone())),
match import_map.get(&name) {
Some(import) => Some(Type::Module(name.into(), *import)),
None => {
let error =
self.report_error_tree(tree, format!("unable to resolve module import {name}"));
@ -2766,8 +2769,8 @@ impl Semantics {
match self.import_map.get() {
Some(m) => {
eprintln!("Import map:");
for (k, b) in m.by_name.iter() {
eprintln!(" {k} => {} ({:?})", b.name, b.module_id);
for (k, b) in m.iter() {
eprintln!(" {k} => {:?}", b);
}
eprintln!();
}
@ -2775,6 +2778,12 @@ impl Semantics {
eprintln!("The import map is not set.\n")
}
}
eprintln!("Module Table:");
for (id, _sem) in self.module_table.borrow().iter() {
eprintln!(" {:?} => ??", id);
}
eprintln!();
}
if let Some(tr) = tr {

View file

@ -153,11 +153,14 @@ impl TestLoader {
}
impl ModuleLoader for TestLoader {
fn normalize_module_name(&self, base: &str, name: String) -> String {
fn normalize_module_name(&self, base: Option<&str>, name: String) -> String {
if name == "__test__" {
name
} else {
let base = if base == "__test__" { "" } else { base };
let base = match base {
Some("__test__") => None,
_ => base,
};
self.base.normalize_module_name(base, name)
}
}