[fine] Lifetime garbage, big refactor

So it turns out that I can't hold `&str` in token because it makes it
impossible to encapsulate a source file in the larger context- self
referential structure problems again. Everything gets rebuilt so that
the source can be passed through. While we're at it, more things
become Rc<> because, man..... life it too short.

Semantics in particular has become a giant hub of the module state: we
can basically just hold an Rc<Semantics> and have everything we could
possibly want to know about a source file, computed lazily if
necessary.
This commit is contained in:
John Doty 2024-02-11 09:31:51 -08:00
parent d5059dd450
commit 2dbdbb3957
7 changed files with 502 additions and 329 deletions

View file

@ -57,7 +57,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
}
assertions.push(quote! {
crate::assert_concrete(&_tree, #concrete, #display_path);
crate::assert_concrete(source.clone(), _tree.clone(), #concrete, #display_path);
});
} else if line == "@compiles-to:" {
let mut compiled = String::new();
@ -72,7 +72,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
}
assertions.push(quote! {
crate::assert_compiles_to(&_tree, &_lines, #compiled, #display_path);
crate::assert_compiles_to(source.clone(), _tree.clone(), _lines.clone(), #compiled, #display_path);
});
} else if let Some(line) = line.strip_prefix("@type:") {
let (pos, expected) = line
@ -85,7 +85,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
.expect(&format!("Unable to parse position '{pos}'"));
let expected = expected.trim();
assertions.push(quote! {
crate::assert_type_at(&_tree, &_lines, #pos, #expected, #display_path);
crate::assert_type_at(source.clone(), _tree.clone(), _lines.clone(), #pos, #expected, #display_path);
});
} else if let Some(line) = line.strip_prefix("@type-error:") {
let (pos, expected) = line
@ -98,21 +98,21 @@ fn generate_test_for_file(path: PathBuf) -> String {
.expect(&format!("Unable to parse position '{pos}'"));
let expected = expected.trim();
assertions.push(quote! {
crate::assert_type_error_at(&_tree, &_lines, #pos, #expected, #display_path);
crate::assert_type_error_at(source.clone(), _tree.clone(), _lines.clone(), #pos, #expected, #display_path);
});
} else if line == "@no-errors" {
assertions.push(quote! {
crate::assert_no_errors(&_tree, &_lines);
crate::assert_no_errors(source.clone(), _tree.clone(), _lines.clone());
});
} else if let Some(line) = line.strip_prefix("@eval:") {
let expected = line.trim();
assertions.push(quote! {
crate::assert_eval_ok(&_tree, &_lines, #expected);
crate::assert_eval_ok(source.clone(), _tree.clone(), _lines.clone(), #expected);
});
} else if let Some(line) = line.strip_prefix("@check-error:") {
let expected = line.trim();
assertions.push(quote! {
crate::assert_check_error(&_tree, &_lines, #expected);
crate::assert_check_error(source.clone(), _tree.clone(), _lines.clone(), #expected);
});
} else if line == "@expect-errors:" {
let mut errors = Vec::new();
@ -127,7 +127,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
let errors = ExpectedErrors(errors);
assertions.push(quote! {
crate::assert_errors(&_tree, &_lines, #errors);
crate::assert_errors(source.clone(), _tree.clone(), _lines.clone(), #errors);
});
} else if line.starts_with("@") {
panic!("Test file {display_path} has unknown directive: {line}");
@ -138,7 +138,8 @@ fn generate_test_for_file(path: PathBuf) -> String {
let test_method = quote! {
#disabled
fn #name() {
let (_tree, _lines) = fine::parser::parse(#contents);
let source : std::rc::Rc<str> = #contents.into();
let (_tree, _lines) = fine::parser::parse(&source);
#(#assertions)*
}
};

View file

@ -148,8 +148,9 @@ struct FunctionKey {
}
struct Compiler<'a> {
semantics: &'a Semantics<'a>,
syntax: &'a SyntaxTree<'a>,
source: &'a str,
semantics: &'a Semantics,
syntax: &'a SyntaxTree,
function_bindings: HashMap<FunctionKey, usize>,
pending_functions: Vec<(FunctionKey, usize, Function)>,
@ -183,8 +184,7 @@ macro_rules! compiler_assert_eq {
let left = &$ll;
let right = &$rr;
if left != right {
let semantics = $compiler.semantics;
semantics.dump_compiler_state(Some($tr));
$compiler.semantics.dump_compiler_state(Some($tr));
assert_eq!(left, right);
}
@ -194,8 +194,7 @@ macro_rules! compiler_assert_eq {
let left = &$ll;
let right = &$rr;
if left != right {
let semantics = $compiler.semantics;
semantics.dump_compiler_state(Some($tr));
$compiler.semantics.dump_compiler_state(Some($tr));
assert_eq!(left, right, $($t)*);
}
@ -205,8 +204,7 @@ macro_rules! compiler_assert_eq {
macro_rules! compiler_assert {
($compiler:expr, $tr:expr, $cond:expr $(,)?) => {{
if !$cond {
let semantics = $compiler.semantics;
semantics.dump_compiler_state(Some($tr));
$compiler.semantics.dump_compiler_state(Some($tr));
assert!($cond);
}
@ -214,8 +212,7 @@ macro_rules! compiler_assert {
($compiler:expr, $tr:expr, $cond:expr, $($arg:tt)+) => {{
if !$cond {
let semantics = $compiler.semantics;
semantics.dump_compiler_state(Some($tr));
$compiler.semantics.dump_compiler_state(Some($tr));
assert!($cond, $($arg)*);
}
@ -224,8 +221,7 @@ macro_rules! compiler_assert {
macro_rules! ice {
($compiler:expr, $tr:expr, $($t:tt)+) => {{
let semantics = $compiler.semantics;
semantics.dump_compiler_state(Some($tr));
$compiler.semantics.dump_compiler_state(Some($tr));
panic!($($t)*)
}}
}
@ -241,10 +237,15 @@ macro_rules! inst_panic {
// ($compiler:expr, $tr:expr, $($t:tt)*) => {{}};
// }
pub fn compile(semantics: &Semantics) -> Rc<Module> {
pub fn compile(semantics: Rc<Semantics>) -> Rc<Module> {
let source = semantics.source();
let syntax_tree = semantics.tree();
let mut compiler = Compiler {
semantics,
syntax: semantics.tree(),
source: &source,
semantics: &semantics,
syntax: &syntax_tree,
function_bindings: HashMap::new(),
pending_functions: Vec::new(),
temp_functions: Vec::new(),
@ -328,14 +329,16 @@ fn compile_expression(c: &mut Compiler, t: TreeRef) {
fn compile_literal(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
let tok = tr.nth_token(0)?;
match c.semantics.type_of(t) {
Type::F64 => c.push(Instruction::PushFloat(tok.as_str().parse().unwrap())),
Type::F64 => c.push(Instruction::PushFloat(
tok.as_str(c.source).parse().unwrap(),
)),
Type::Bool => c.push(if tok.kind == TokenKind::True {
Instruction::PushTrue
} else {
Instruction::PushFalse
}),
Type::String => {
let result = string_constant_to_string(tok.as_str());
let result = string_constant_to_string(tok.as_str(c.source));
let index = c.add_string(result);
c.push(Instruction::PushString(index))
}
@ -534,24 +537,16 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
let declaration = match ltree.kind {
// TODO: Assign to list access
TreeKind::Identifier => {
let id = ltree.nth_token(0)?;
let id = ltree.nth_token(0)?.as_str(&c.source);
environment = c.semantics.environment_of(lvalue);
environment.bind(id)?
}
TreeKind::MemberAccess => {
let id = ltree.nth_token(2)?;
let typ = c.semantics.type_of(ltree.nth_tree(0)?);
environment = match &typ {
Type::Object(ct, _) => {
let class = c.semantics.class_of(*ct);
class.env.clone()
}
Type::Class(ct, _) => {
let class = c.semantics.class_of(*ct);
class.static_env.clone()
}
_ => return None,
};
let id = ltree.nth_token(2)?.as_str(&c.source);
let t = ltree.nth_tree(0)?;
let typ = c.semantics.type_of(t);
environment = c.semantics.member_environment(t, &typ);
environment.bind(id)?
}
_ => return None,
@ -587,16 +582,22 @@ fn compile_binary_expression(c: &mut Compiler, t: TreeRef, tr: &Tree) -> CR {
Declaration::ExternFunction { .. } => inst_panic!("store ext"),
Declaration::Function { .. } => inst_panic!("store func"),
Declaration::Class { .. } => inst_panic!("store class"),
Declaration::Import { .. } => inst_panic!("store import"),
};
c.push(instruction);
OK
}
_ => ice!(c, t, "Unsupported binary expression '{op}'"),
_ => ice!(
c,
t,
"Unsupported binary expression '{}'",
op.as_str(&c.source)
),
}
}
fn compile_identifier_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> Option<()> {
let ident = tree.nth_token(0)?;
let ident = tree.nth_token(0)?.as_str(&c.source);
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(ident)?;
@ -659,6 +660,8 @@ fn compile_load_declaration(c: &mut Compiler, t: TreeRef, declaration: &Declarat
// Must be a static don't worry about it.
Declaration::Class { .. } => return OK,
Declaration::Import { .. } => todo!(),
};
c.push(instruction);
@ -677,10 +680,10 @@ fn compile_pattern(c: &mut Compiler, t: TreeRef) -> Option<()> {
// Let's *try* to generate good code in the presence of a wildcard pattern....
let is_wildcard = tree
.child_tree_of_kind(c.syntax, TreeKind::WildcardPattern)
.child_tree_of_kind(&c.syntax, TreeKind::WildcardPattern)
.is_some();
let type_expr = tree.child_tree_of_kind(c.syntax, TreeKind::TypeExpression);
let type_expr = tree.child_tree_of_kind(&c.syntax, TreeKind::TypeExpression);
let and_index = tree.children.iter().position(|c| match c {
Child::Token(t) => t.kind == TokenKind::And,
@ -688,10 +691,10 @@ fn compile_pattern(c: &mut Compiler, t: TreeRef) -> Option<()> {
});
// If you have a binding, dup and store now, it is in scope.
if let Some(binding) = tree.child_tree_of_kind(c.syntax, TreeKind::VariableBinding) {
if let Some(binding) = tree.child_tree_of_kind(&c.syntax, TreeKind::VariableBinding) {
if let Some(variable) = binding.nth_token(0) {
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(variable)?;
let declaration = environment.bind(variable.as_str(&c.source))?;
let Declaration::Variable {
location: Location::Local,
@ -765,8 +768,8 @@ fn compile_type_expr_eq(c: &mut Compiler, t: TreeRef) {
}
fn compile_type_identifier_eq(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
let identifier = tree.nth_token(0)?;
match identifier.as_str() {
let identifier = tree.nth_token(0)?.as_str(&c.source);
match identifier {
"f64" => {
c.push(Instruction::IsFloat);
}
@ -825,7 +828,7 @@ fn compile_type_alternate_eq(c: &mut Compiler, tree: &Tree) -> CR {
}
fn compile_call_expression(c: &mut Compiler, tree: &Tree) -> CR {
let arg_list = tree.child_tree_of_kind(c.syntax, TreeKind::ArgumentList)?;
let arg_list = tree.child_tree_of_kind(&c.syntax, TreeKind::ArgumentList)?;
let mut args: Vec<_> = arg_list.child_trees().collect();
let arg_count = args.len();
@ -876,12 +879,12 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C
};
let class = c.semantics.class_of(ct);
let field_list = tree.child_tree_of_kind(c.syntax, TreeKind::FieldList)?;
let field_list = tree.child_tree_of_kind(&c.syntax, TreeKind::FieldList)?;
let mut field_bindings = HashMap::new();
for field in field_list.children_of_kind(c.syntax, TreeKind::FieldValue) {
for field in field_list.children_of_kind(&c.syntax, TreeKind::FieldValue) {
let f = &c.syntax[field];
let name = f.nth_token(0)?;
field_bindings.insert(name.as_str(), field);
field_bindings.insert(name.as_str(&c.source), field);
}
// The fields come in this order and since arguments are backwards
@ -894,8 +897,8 @@ fn compile_new_object_expression(c: &mut Compiler, t: TreeRef, tree: &Tree) -> C
// Fetch the correct constructor.
// TODO: Binding this type should be done by semantics, and we should borrow it.
let type_reference = tree.child_tree_of_kind(c.syntax, TreeKind::TypeIdentifier)?;
let identifier = type_reference.nth_token(0)?;
let type_reference = tree.child_tree_of_kind(&c.syntax, TreeKind::TypeIdentifier)?;
let identifier = type_reference.nth_token(0)?.as_str(&c.source);
let environment = c.semantics.environment_of(t);
match environment.bind(identifier)? {
Declaration::Class { declaration, .. } => {
@ -931,7 +934,7 @@ fn compile_field_value(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
// Form 2: { x, ... }
let environment = c.semantics.environment_of(t);
let id = tree.nth_token(0)?;
let id = tree.nth_token(0)?.as_str(&c.source);
let declaration = environment.bind(id)?;
compile_load_declaration(c, t, declaration)
@ -944,7 +947,7 @@ fn compile_member_access(c: &mut Compiler, t: TreeRef, tree: &Tree) -> CR {
compile_expression(c, tree.nth_tree(0)?);
let typ = c.semantics.type_of(tree.nth_tree(0)?);
let ident = tree.nth_token(2)?;
let ident = tree.nth_token(2)?.as_str(&c.source);
let environment = match &typ {
Type::Object(ct, _) => {
@ -976,7 +979,7 @@ fn compile_self_reference(c: &mut Compiler) -> CR {
fn compile_list_constructor(c: &mut Compiler, tree: &Tree) -> CR {
let mut children: Vec<_> = tree
.children_of_kind(c.syntax, TreeKind::ListConstructorElement)
.children_of_kind(&c.syntax, TreeKind::ListConstructorElement)
.collect();
children.reverse();
let count = children.len();
@ -1046,7 +1049,7 @@ fn compile_expression_statement(c: &mut Compiler, tree: &Tree, gen_value: bool)
fn compile_let_statement(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_value: bool) -> CR {
compile_expression(c, tree.nth_tree(3)?);
let environment = c.semantics.environment_of(t);
let declaration = environment.bind(tree.nth_token(1)?)?;
let declaration = environment.bind(tree.nth_token(1)?.as_str(&c.source))?;
let Declaration::Variable {
location, index, ..
@ -1087,19 +1090,16 @@ fn compile_function_declaration(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_v
let fk = FunctionKey { tree: t };
if !c.function_bindings.contains_key(&fk) {
// TODO: If this is a method the name should be different.
let name = tree.nth_token(1)?;
let name = tree.nth_token(1)?.as_str(&c.source);
let param_list = tree.child_tree_of_kind(c.syntax, TreeKind::ParamList)?;
let param_list = tree.child_tree_of_kind(&c.syntax, TreeKind::ParamList)?;
let param_count = param_list.children.len() - 2;
let function_index = c.temp_functions.len();
c.temp_functions.push(None);
c.pending_functions.push((
fk.clone(),
function_index,
Function::new(name.as_str(), param_count),
));
c.pending_functions
.push((fk.clone(), function_index, Function::new(name, param_count)));
c.function_bindings.insert(fk, function_index);
c.module
.exports
@ -1118,18 +1118,15 @@ fn compile_class_declaration(c: &mut Compiler, t: TreeRef, tree: &Tree, gen_valu
// Classes get compiled as constructor functions which get called.
let fk = FunctionKey { tree: t };
if !c.function_bindings.contains_key(&fk) {
let name = tree.nth_token(1)?;
let name = tree.nth_token(1)?.as_str(&c.source);
let field_count = tree.children.len() - 2;
let function_index = c.temp_functions.len();
c.temp_functions.push(None);
c.pending_functions.push((
fk.clone(),
function_index,
Function::new(name.as_str(), field_count),
));
c.pending_functions
.push((fk.clone(), function_index, Function::new(name, field_count)));
c.function_bindings.insert(fk, function_index);
c.module
.exports
@ -1147,16 +1144,18 @@ fn compile_function(c: &mut Compiler, t: TreeRef) -> CR {
let tree = &c.syntax[t];
match tree.kind {
TreeKind::FunctionDecl => {
let block = tree.child_of_kind(c.syntax, TreeKind::Block)?;
let block = tree.child_of_kind(&c.syntax, TreeKind::Block)?;
compile_expression(c, block);
}
TreeKind::ClassDecl => {
let count = tree.children_of_kind(c.syntax, TreeKind::FieldDecl).count();
let count = tree
.children_of_kind(&c.syntax, TreeKind::FieldDecl)
.count();
for i in 0..count {
c.push(Instruction::LoadArgument(count - 1 - i));
}
let name = tree.nth_token(1)?.as_str();
let name = tree.nth_token(1)?.as_str(&c.source);
let name_index = c.add_string(name.to_string());
c.push(Instruction::PushString(name_index));
c.push(Instruction::PushInt(t.index().try_into().unwrap()));
@ -1209,7 +1208,7 @@ fn compile_for_statement(c: &mut Compiler, tree: &Tree, gen_value: bool) -> CR {
// Figure out the variable.
let vt = tree.nth_tree(1)?;
let var = &c.syntax[vt];
let id = var.nth_token(0)?;
let id = var.nth_token(0)?.as_str(&c.source);
let body = tree.nth_tree(4)?;
let env = c.semantics.environment_of(body);

View file

@ -1,4 +1,4 @@
use std::fs;
use std::{fs, rc::Rc};
use compiler::compile;
use parser::parse;
@ -11,6 +11,21 @@ pub mod semantics;
pub mod tokens;
pub mod vm;
// struct SourceModule {
// semantics: Rc<Semantics>,
// }
// impl SourceModule {
// pub fn new(source: &str) -> Self {
// let source: Rc<str> = source.into();
// let (syntax, lines) = parse(&source);
// let semantics = Rc::new(Semantics::new(source, syntax, lines));
// SourceModule { semantics }
// }
// }
// struct Environment {}
pub fn process_file(file: &str) {
let source = match fs::read_to_string(file) {
Ok(c) => c,
@ -21,21 +36,21 @@ pub fn process_file(file: &str) {
};
// What am I doing here?
let source: Rc<str> = source.into();
let (tree, lines) = parse(&source);
let semantics = Semantics::new(&tree, &lines);
let semantics = Rc::new(Semantics::new(source, tree, lines));
check(&semantics);
// OK now there might be errors.
let mut errors = semantics.snapshot_errors();
let errors = semantics.snapshot_errors();
if errors.len() > 0 {
errors.reverse();
for e in errors {
eprintln!("{file}: {}:{}: {}", e.start.0, e.start.1, e.message);
}
return;
}
let module = compile(&semantics);
let module = compile(semantics);
let main_function = module.functions[module.init].clone();
let mut context = Context::new(module.clone());

View file

@ -2,14 +2,15 @@
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt::Write as _;
use std::rc::Rc;
use std::{cell::Cell, num::NonZeroU32};
pub struct SyntaxTree<'a> {
trees: Vec<Tree<'a>>,
pub struct SyntaxTree {
trees: Vec<Tree>,
root: Option<TreeRef>,
}
impl<'a> SyntaxTree<'a> {
impl SyntaxTree {
pub fn new() -> Self {
SyntaxTree {
trees: vec![],
@ -21,7 +22,7 @@ impl<'a> SyntaxTree<'a> {
self.root
}
pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
pub fn add_tree(&mut self, mut t: Tree) -> TreeRef {
assert!(t.parent.is_none());
let tr = TreeRef::from_index(self.trees.len());
@ -50,10 +51,10 @@ impl<'a> SyntaxTree<'a> {
tr
}
pub fn dump(&self, with_positions: bool) -> String {
pub fn dump(&self, source: &str, with_positions: bool) -> String {
let mut output = String::new();
if let Some(r) = self.root {
self[r].dump(self, with_positions, &mut output);
self[r].dump(source, self, with_positions, &mut output);
}
output
}
@ -102,15 +103,15 @@ impl<'a> SyntaxTree<'a> {
}
}
impl<'a> std::ops::Index<TreeRef> for SyntaxTree<'a> {
type Output = Tree<'a>;
impl std::ops::Index<TreeRef> for SyntaxTree {
type Output = Tree;
fn index(&self, index: TreeRef) -> &Self::Output {
&self.trees[index.index()]
}
}
impl<'a> std::ops::IndexMut<TreeRef> for SyntaxTree<'a> {
impl std::ops::IndexMut<TreeRef> for SyntaxTree {
fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output {
&mut self.trees[index.index()]
}
@ -164,18 +165,20 @@ pub enum TreeKind {
VariableBinding,
WhileStatement,
WildcardPattern,
Import,
}
pub struct Tree<'a> {
pub struct Tree {
pub kind: TreeKind,
pub parent: Option<TreeRef>, // TODO: Do we actually need this?
pub start_pos: usize,
pub end_pos: usize,
pub children: Vec<Child<'a>>,
pub children: Vec<Child>,
}
impl<'a> Tree<'a> {
pub fn nth_token(&self, index: usize) -> Option<&Token<'a>> {
impl Tree {
pub fn nth_token(&self, index: usize) -> Option<&Token> {
self.children
.get(index)
.map(|c| match c {
@ -215,27 +218,23 @@ impl<'a> Tree<'a> {
self.children_of_kind(&s, kind).next()
}
pub fn child_tree_of_kind<'b>(
&'b self,
s: &'b SyntaxTree<'a>,
kind: TreeKind,
) -> Option<&'b Tree<'a>> {
pub fn child_tree_of_kind<'b>(&'b self, s: &'b SyntaxTree, kind: TreeKind) -> Option<&'b Tree> {
self.child_of_kind(s, kind).map(|t| &s[t])
}
pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
pub fn dump(&self, source: &str, tree: &SyntaxTree, with_positions: bool, output: &mut String) {
let _ = write!(output, "{:?}", self.kind);
if with_positions {
let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
}
let _ = write!(output, "\n");
for child in self.children.iter() {
child.dump_rec(2, tree, with_positions, output);
child.dump_rec(source, 2, tree, with_positions, output);
}
}
}
impl<'a> std::fmt::Debug for Tree<'a> {
impl std::fmt::Debug for Tree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?} [{}-{})", self.kind, self.start_pos, self.end_pos)
}
@ -256,16 +255,17 @@ impl TreeRef {
}
}
pub enum Child<'a> {
Token(Token<'a>),
pub enum Child {
Token(Token),
Tree(TreeRef),
}
impl<'a> Child<'a> {
impl Child {
fn dump_rec(
&self,
source: &str,
indent: usize,
tree: &SyntaxTree<'a>,
tree: &SyntaxTree,
with_positions: bool,
output: &mut String,
) {
@ -274,9 +274,9 @@ impl<'a> Child<'a> {
}
match self {
Child::Token(t) => {
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str(source));
if with_positions {
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
let _ = write!(output, " [{}, {})", t.start(), t.end());
}
let _ = write!(output, "\n");
}
@ -289,7 +289,7 @@ impl<'a> Child<'a> {
let _ = write!(output, "\n");
for child in t.children.iter() {
child.dump_rec(indent + 2, tree, with_positions, output);
child.dump_rec(source, indent + 2, tree, with_positions, output);
}
}
}
@ -297,23 +297,23 @@ impl<'a> Child<'a> {
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start,
Child::Token(t) => t.start(),
Child::Tree(t) => syntax_tree[*t].start_pos,
}
}
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start + t.as_str().len(),
Child::Token(t) => t.end(),
Child::Tree(t) => syntax_tree[*t].end_pos,
}
}
}
enum ParseEvent<'a> {
enum ParseEvent {
Start { kind: TreeKind },
End,
Advance { token: Token<'a> },
Advance { token: Token },
}
struct MarkStarted {
@ -326,10 +326,10 @@ struct MarkClosed {
struct CParser<'a> {
tokens: Tokens<'a>,
current: Token<'a>,
next: Token<'a>,
current: Token,
next: Token,
fuel: Cell<u32>,
events: Vec<ParseEvent<'a>>,
events: Vec<ParseEvent>,
panic: bool,
}
@ -337,8 +337,8 @@ impl<'a> CParser<'a> {
fn new(tokens: Tokens<'a>) -> Self {
let mut parser = CParser {
tokens,
current: Token::new(TokenKind::EOF, 0, ""),
next: Token::new(TokenKind::EOF, 0, ""),
current: Token::new(TokenKind::EOF, 0, 0),
next: Token::new(TokenKind::EOF, 0, 0),
fuel: Cell::new(256),
events: Vec::new(),
panic: false,
@ -411,7 +411,8 @@ impl<'a> CParser<'a> {
if self.fuel.get() == 0 {
panic!(
"parser is stuck at '{}' ({})!",
self.current, self.current.start
self.current.as_str(self.tokens.source()),
self.current.start()
);
}
self.fuel.set(self.fuel.get() - 1);
@ -422,7 +423,8 @@ impl<'a> CParser<'a> {
if self.fuel.get() == 0 {
panic!(
"parser is stuck at '{}' ({})!",
self.current, self.current.start
self.current.as_str(self.tokens.source()),
self.current.start()
);
}
self.fuel.set(self.fuel.get() - 1);
@ -487,7 +489,7 @@ impl<'a> CParser<'a> {
self.error_at(self.current.clone(), message)
}
fn error_at<T>(&mut self, token: Token<'a>, message: T)
fn error_at<T>(&mut self, token: Token, message: T)
where
T: Into<String>,
{
@ -503,18 +505,18 @@ impl<'a> CParser<'a> {
final_message.push_str("at end")
} else if token.kind != TokenKind::Error {
final_message.push_str("at '");
final_message.push_str(token.as_str());
final_message.push_str(token.as_str(self.tokens.source()));
final_message.push_str("'");
}
final_message.push_str(": ");
final_message.push_str(&message);
self.events.push(ParseEvent::Advance {
token: Token::error(token.start, final_message),
token: Token::error(token.start(), token.end(), final_message),
});
}
fn build_tree(self) -> (SyntaxTree<'a>, Lines) {
fn build_tree(self) -> (Rc<SyntaxTree>, Rc<Lines>) {
let mut events = self.events;
let mut stack = Vec::new();
@ -555,11 +557,11 @@ impl<'a> CParser<'a> {
let root = result.add_tree(stack.pop().unwrap());
result.root = Some(root);
(result, self.tokens.lines())
(Rc::new(result), Rc::new(self.tokens.lines()))
}
}
pub fn parse(source: &str) -> (SyntaxTree, Lines) {
pub fn parse(source: &str) -> (Rc<SyntaxTree>, Rc<Lines>) {
let tokens = Tokens::new(source);
let mut parser = CParser::new(tokens);
@ -572,6 +574,7 @@ fn file(p: &mut CParser) {
let m = p.start();
while !p.eof() {
match p.peek() {
TokenKind::Import => import(p),
TokenKind::Class => class(p),
TokenKind::RightBrace => {
// An error parsing mismatched braces can leave me at an
@ -613,6 +616,26 @@ fn function(p: &mut CParser) {
p.end(m, TreeKind::FunctionDecl);
}
fn import(p: &mut CParser) {
let m = p.start();
p.expect_start(TokenKind::Import);
p.expect(
TokenKind::String,
"expected a string as the path of the module to import",
);
p.expect(
TokenKind::As,
"expected `as` between the module string and identifier",
);
p.expect(
TokenKind::Identifier,
"expected an identifier for the module",
);
p.end(m, TreeKind::Import);
}
fn class(p: &mut CParser) {
let m = p.start();

View file

@ -149,6 +149,9 @@ pub enum Type {
// An alternate is one or another type.
Alternate(Box<[Type]>),
// A module of some kind. What module?
Module(Rc<str>),
}
impl Type {
@ -163,19 +166,20 @@ impl Type {
match self {
Type::Error => 0,
Type::Unreachable => 1,
Type::Assignment(_) => 2,
Type::TypeVariable(_) => 3,
Type::Assignment(..) => 2,
Type::TypeVariable(..) => 3,
Type::Nothing => 4,
Type::F64 => 5,
Type::I64 => 6,
Type::String => 7,
Type::Bool => 8,
Type::Function(_, _) => 9,
Type::Method(_, _, _) => 10,
Type::List(_) => 11,
Type::Class(_, _) => 12,
Type::Object(_, _) => 13,
Type::Alternate(_) => 14,
Type::Function(..) => 9,
Type::Method(..) => 10,
Type::List(..) => 11,
Type::Class(..) => 12,
Type::Object(..) => 13,
Type::Alternate(..) => 14,
Type::Module(..) => 15,
}
}
}
@ -242,6 +246,7 @@ impl fmt::Display for Type {
}
Ok(())
}
Module(name) => write!(f, "module {}", name),
}
}
}
@ -335,6 +340,9 @@ pub enum Declaration {
Class {
declaration: TreeRef, //?
},
Import {
declaration: TreeRef,
},
}
pub struct Environment {
@ -383,8 +391,8 @@ impl Environment {
})
}
pub fn insert(&mut self, token: &Token, t: TreeRef) -> Option<Declaration> {
self.insert_name(token.as_str().into(), t)
pub fn insert(&mut self, token: &str, t: TreeRef) -> Option<Declaration> {
self.insert_name(token.into(), t)
}
pub fn insert_name(&mut self, name: Box<str>, t: TreeRef) -> Option<Declaration> {
@ -400,14 +408,14 @@ impl Environment {
result
}
pub fn bind(&self, token: &Token) -> Option<&Declaration> {
if let Some(decl) = self.declarations.get(token.as_str()) {
pub fn bind(&self, token: &str) -> Option<&Declaration> {
if let Some(decl) = self.declarations.get(token) {
return Some(decl);
}
let mut current = &self.parent;
while let Some(env) = current {
if let Some(decl) = env.declarations.get(token.as_str()) {
if let Some(decl) = env.declarations.get(token) {
return Some(decl);
}
current = &env.parent;
@ -597,11 +605,10 @@ enum Incremental<T> {
Complete(T),
}
pub struct Semantics<'a> {
// TODO: Do I really want my own copy here? Should we standardize on Arc
// or Rc or some other nice sharing mechanism?
syntax_tree: &'a SyntaxTree<'a>,
lines: &'a Lines,
pub struct Semantics {
source: Rc<str>,
syntax_tree: Rc<SyntaxTree>,
lines: Rc<Lines>,
// Instead of physical parents, this is the set of *logical* parents.
// This is what is used for binding.
@ -615,17 +622,18 @@ pub struct Semantics<'a> {
classes: RefCell<Vec<Incremental<ClassRef>>>,
}
impl<'a> Semantics<'a> {
pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self {
impl Semantics {
pub fn new(source: Rc<str>, tree: Rc<SyntaxTree>, lines: Rc<Lines>) -> Self {
let mut logical_parents = vec![None; tree.len()];
if let Some(root) = tree.root() {
set_logical_parents(&mut logical_parents, tree, root, None);
set_logical_parents(&mut logical_parents, &tree, root, None);
}
let root_environment = Environment::new(None, Location::Module);
let mut semantics = Semantics {
syntax_tree: tree,
source,
syntax_tree: tree.clone(),
lines,
logical_parents,
errors: RefCell::new(vec![]),
@ -645,8 +653,16 @@ impl<'a> Semantics<'a> {
semantics
}
pub fn tree(&self) -> &SyntaxTree<'a> {
&self.syntax_tree
pub fn source(&self) -> Rc<str> {
self.source.clone()
}
pub fn tree(&self) -> Rc<SyntaxTree> {
self.syntax_tree.clone()
}
pub fn lines(&self) -> Rc<Lines> {
self.lines.clone()
}
pub fn snapshot_errors(&self) -> Vec<Error> {
@ -666,16 +682,6 @@ impl<'a> Semantics<'a> {
}
}
fn report_error<T>(&self, position: usize, error: T)
where
T: ToString,
{
let (line, col) = self.lines.position(position);
self.errors
.borrow_mut()
.push(Error::new(line, col, error.to_string()));
}
fn report_error_span<T>(&self, start: usize, end: usize, error: T)
where
T: ToString,
@ -687,7 +693,7 @@ impl<'a> Semantics<'a> {
.push(Error::new_spanned(start, end, error.to_string()));
}
fn report_error_tree<T>(&self, tree: &Tree<'a>, error: T)
fn report_error_tree<T>(&self, tree: &Tree, error: T)
where
T: ToString,
{
@ -702,9 +708,6 @@ impl<'a> Semantics<'a> {
self.report_error_span(tree.start_pos, tree.end_pos, error)
}
// pub fn lvalue_declaration(&self, t: TreeRef) -> Option<&Declaration> {
// }
fn gather_errors(&mut self, tree: TreeRef) {
let mut stack = vec![tree];
while let Some(tr) = stack.pop() {
@ -713,7 +716,7 @@ impl<'a> Semantics<'a> {
match child {
Child::Token(t) => {
if t.kind == TokenKind::Error {
self.report_error(t.start, t.as_str());
self.report_error_span(t.start(), t.end(), t.as_str(&self.source));
}
}
Child::Tree(t) => stack.push(*t),
@ -779,13 +782,16 @@ impl<'a> Semantics<'a> {
};
let existing = environment.declarations.insert(
name.as_str().into(),
name.as_str(&self.source).into(),
Declaration::Function { declaration: *t },
);
if existing.is_some() {
self.report_error_tree(
ct,
format!("duplicate definition of function '{name}'"),
format!(
"duplicate definition of function '{}'",
name.as_str(&self.source)
),
);
}
}
@ -799,15 +805,21 @@ impl<'a> Semantics<'a> {
fn environment_of_file(&self, parent: EnvironmentRef, tree: &Tree) -> EnvironmentRef {
let mut environment = Environment::new(Some(parent), Location::Module);
for child in tree.children.iter() {
match child {
Child::Tree(t) => {
let Child::Tree(t) = child else {
continue;
};
let ct = &self.syntax_tree[*t];
let binding = match ct.kind {
TreeKind::FunctionDecl => {
let Some(name) = ct.nth_token(1) else {
continue;
};
if name.kind != TokenKind::Identifier {
continue;
}
let declaration = Declaration::Function { declaration: *t };
Some(("function", name, declaration))
@ -816,28 +828,42 @@ impl<'a> Semantics<'a> {
let Some(name) = ct.nth_token(1) else {
continue;
};
if name.kind != TokenKind::Identifier {
continue;
}
let declaration = Declaration::Class { declaration: *t };
Some(("class", name, declaration))
}
TreeKind::Import => {
let Some(name) = ct.nth_token(3) else {
continue;
};
if name.kind != TokenKind::Identifier {
continue;
}
let declaration = Declaration::Import { declaration: *t };
Some(("import", name, declaration))
}
_ => None,
};
if let Some((what, name, declaration)) = binding {
let existing = environment
.declarations
.insert(name.as_str().into(), declaration);
.insert(name.as_str(&self.source).into(), declaration);
if existing.is_some() {
self.report_error_tree(
ct,
format!("duplicate definition of {what} '{name}'"),
format!(
"duplicate definition of {what} '{}'",
name.as_str(&self.source)
),
);
}
}
}
_ => {}
}
}
EnvironmentRef::new(environment)
}
@ -861,7 +887,7 @@ impl<'a> Semantics<'a> {
};
let mut environment = Environment::new(Some(parent), location);
environment.insert(name, declaration);
environment.insert(name.as_str(&self.source), declaration);
EnvironmentRef::new(environment)
}
@ -878,7 +904,10 @@ impl<'a> Semantics<'a> {
match param.kind {
TreeKind::SelfParameter => {
let param_name = param.nth_token(0).unwrap();
if environment.insert(param_name, *ct).is_some() {
if environment
.insert(param_name.as_str(&self.source), *ct)
.is_some()
{
self.report_error_tree(
param,
format!("duplicate definition of self parameter"),
@ -895,10 +924,11 @@ impl<'a> Semantics<'a> {
continue;
};
if environment.insert(param_name, *ct).is_some() {
let param_str = param_name.as_str(&self.source);
if environment.insert(param_str, *ct).is_some() {
self.report_error_tree(
param,
format!("duplicate definition of parameter '{param_name}'"),
format!("duplicate definition of parameter '{param_str}'"),
);
}
}
@ -920,7 +950,7 @@ impl<'a> Semantics<'a> {
};
let mut environment = Environment::new(Some(parent), Location::Local);
environment.insert(id, it);
environment.insert(id.as_str(&self.source), it);
EnvironmentRef::new(environment)
}
@ -928,7 +958,7 @@ impl<'a> Semantics<'a> {
assert_eq!(tree.kind, TreeKind::IsExpression);
// The environment of an `is` expression is the environment produced by the pattern.
let Some(pattern) = tree.child_tree_of_kind(self.syntax_tree, TreeKind::Pattern) else {
let Some(pattern) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::Pattern) else {
// Should really have a pattern in there; otherwise there was a
// parse error, don't make more trouble.
return Environment::error();
@ -950,7 +980,7 @@ impl<'a> Semantics<'a> {
assert_eq!(tree.kind, TreeKind::MatchArm);
// The environment of a `match arm` expression is the environment produced by the pattern.
let Some(pattern) = tree.child_tree_of_kind(self.syntax_tree, TreeKind::Pattern) else {
let Some(pattern) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::Pattern) else {
// Should really have a pattern in there; otherwise there was a
// parse error, don't make more trouble.
return Environment::error();
@ -989,7 +1019,7 @@ impl<'a> Semantics<'a> {
) -> EnvironmentRef {
assert_eq!(tree.kind, TreeKind::Pattern);
let Some(binding) = tree.child_tree_of_kind(self.syntax_tree, TreeKind::VariableBinding)
let Some(binding) = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::VariableBinding)
else {
// No binding, no new environment.
return parent;
@ -999,7 +1029,7 @@ impl<'a> Semantics<'a> {
};
let is_wildcard = tree
.child_of_kind(self.syntax_tree, TreeKind::WildcardPattern)
.child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern)
.is_some();
let variable_decl = if is_wildcard {
@ -1010,7 +1040,7 @@ impl<'a> Semantics<'a> {
} else {
// Otherwise the binding is to the type expression which must
// match for the variable to have a value.
let Some(type_expr) = tree.child_of_kind(self.syntax_tree, TreeKind::TypeExpression)
let Some(type_expr) = tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression)
else {
return Environment::error();
};
@ -1020,7 +1050,7 @@ impl<'a> Semantics<'a> {
// TODO: This binding should be un-assignable! Don't assign to this!
let mut env = Environment::new(Some(parent), Location::Local);
env.insert(variable, variable_decl);
env.insert(variable.as_str(&self.source), variable_decl);
EnvironmentRef::new(env)
}
@ -1044,11 +1074,14 @@ impl<'a> Semantics<'a> {
let tree = &self.syntax_tree[t];
assert_eq!(tree.kind, TreeKind::ClassDecl);
let name = tree.nth_token(1).map(|t| t.as_str()).unwrap_or("<??>");
let name = tree
.nth_token(1)
.map(|t| t.as_str(&self.source))
.unwrap_or("<??>");
// Fields
let mut fields = Vec::new();
for field in tree.children_of_kind(self.syntax_tree, TreeKind::FieldDecl) {
for field in tree.children_of_kind(&self.syntax_tree, TreeKind::FieldDecl) {
let f = &self.syntax_tree[field];
if let Some(field_name) = f.nth_token(0) {
let field_type = f
@ -1056,7 +1089,7 @@ impl<'a> Semantics<'a> {
.map(|t| self.type_of(t))
.unwrap_or(Type::Error);
fields.push(FieldDecl {
name: field_name.as_str().into(),
name: field_name.as_str(&self.source).into(),
declaration: field,
field_type,
});
@ -1065,7 +1098,7 @@ impl<'a> Semantics<'a> {
// Methods
let mut methods = Vec::new();
for method in tree.children_of_kind(self.syntax_tree, TreeKind::FunctionDecl) {
for method in tree.children_of_kind(&self.syntax_tree, TreeKind::FunctionDecl) {
let m = &self.syntax_tree[method];
if let Some(method_name) = m.nth_token(1) {
// TODO: Check to see if it is actually a method, or if it is a static function.
@ -1073,7 +1106,7 @@ impl<'a> Semantics<'a> {
match decl_type {
Type::Method(..) => {
methods.push(MethodDecl {
name: method_name.as_str().into(),
name: method_name.as_str(&self.source).into(),
decl_type,
declaration: method,
is_static: false,
@ -1082,7 +1115,7 @@ impl<'a> Semantics<'a> {
_ => {
// TODO: Default to method or static?
methods.push(MethodDecl {
name: method_name.as_str().into(),
name: method_name.as_str(&self.source).into(),
decl_type,
declaration: method,
is_static: true,
@ -1287,6 +1320,7 @@ impl<'a> Semantics<'a> {
TreeKind::TypeParameter => self.type_of_type_parameter(tree),
TreeKind::UnaryExpression => self.type_of_unary(tree),
TreeKind::WhileStatement => self.type_of_while(tree),
TreeKind::Import => self.type_of_import(tree),
_ => self.internal_compiler_error(Some(t), "asking for a nonsense type"),
};
@ -1313,8 +1347,9 @@ impl<'a> Semantics<'a> {
// This is dumb and should be punished, probably.
(_, Type::Unreachable) => {
self.report_error(
op.start,
self.report_error_span(
op.start(),
op.end(),
"cannot apply a unary operator to something that doesn't yield a value",
);
Some(Type::Error)
@ -1324,11 +1359,12 @@ impl<'a> Semantics<'a> {
(_, Type::Error) => Some(Type::Error),
(_, arg_type) => {
self.report_error(
op.start,
self.report_error_span(
op.start(),
op.end(),
format!(
"cannot apply unary operator '{}' to value of type {}",
op.as_str(),
op.as_str(&self.source),
arg_type
),
);
@ -1372,16 +1408,24 @@ impl<'a> Semantics<'a> {
// This is dumb and should be punished, probably.
(_, _, Type::Unreachable) => {
self.report_error(
op.start,
format!("cannot apply '{op}' to an argument that doesn't yield a value (on the right)"),
self.report_error_span(
op.start(),
op.end(),
format!(
"cannot apply '{}' to an argument that doesn't yield a value (on the right)",
op.as_str(&self.source)
),
);
Some(Type::Error)
}
(_, Type::Unreachable, _) => {
self.report_error(
op.start,
format!("cannot apply '{op}' to an argument that doesn't yield a value (on the left)"),
self.report_error_span(
op.start(),
op.end(),
format!(
"cannot apply '{}' to an argument that doesn't yield a value (on the left)",
op.as_str(&self.source)
),
);
Some(Type::Error)
}
@ -1395,9 +1439,13 @@ impl<'a> Semantics<'a> {
// Missed the whole table, it must be an error.
(_, left_type, right_type) => {
self.report_error(
op.start,
format!("cannot apply binary operator '{op}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)"),
self.report_error_span(
op.start(),
op.end(),
format!(
"cannot apply binary operator '{}' to expressions of type '{left_type}' (on the left) and '{right_type}' (on the right)",
op.as_str(&self.source)
),
);
Some(Type::Error)
}
@ -1420,7 +1468,7 @@ impl<'a> Semantics<'a> {
let declaration = match tree.kind {
// TODO: Assign to list access
TreeKind::Identifier => {
let id = tree.nth_token(0)?;
let id = tree.nth_token(0)?.as_str(&self.source);
environment = self.environment_of(left_tree);
match environment.bind(id) {
Some(decl) => decl,
@ -1433,7 +1481,7 @@ impl<'a> Semantics<'a> {
}
}
TreeKind::MemberAccess => {
let id = tree.nth_token(2)?;
let id = tree.nth_token(2)?.as_str(&self.source);
let typ = self.type_of(tree.nth_tree(0)?);
environment = self.member_environment(left_tree, &typ);
match environment.bind(id) {
@ -1471,6 +1519,13 @@ impl<'a> Semantics<'a> {
);
return Some(Type::Error);
}
Declaration::Import { .. } => {
self.report_error_tree_ref(
left_tree,
"cannot assign a new value to an imported module",
);
return Some(Type::Error);
}
}
let _ = environment;
@ -1489,8 +1544,9 @@ impl<'a> Semantics<'a> {
} else if self.can_convert(&right_type, &left_type) {
Some(Type::Assignment(Box::new(left_type)))
} else {
self.report_error(
op.start,
self.report_error_span(
op.start(),
op.end(),
format!("cannot assign a value of type '{right_type}' to type '{left_type}'"),
);
Some(Type::Error)
@ -1506,15 +1562,15 @@ impl<'a> Semantics<'a> {
assert_eq!(tree.kind, TreeKind::TypeIdentifier);
// TODO: This will *clearly* need to get better.
let token = tree.nth_token(0)?;
match token.as_str() {
let token = tree.nth_token(0)?.as_str(&self.source);
match token {
"f64" => Some(Type::F64),
"string" => Some(Type::String),
"bool" => Some(Type::Bool),
"nothing" => Some(Type::Nothing),
"list" => {
let args =
tree.child_tree_of_kind(self.syntax_tree, TreeKind::TypeParameterList)?;
tree.child_tree_of_kind(&self.syntax_tree, TreeKind::TypeParameterList)?;
let mut arg_types: Vec<_> = args.child_trees().map(|t| self.type_of(t)).collect();
if arg_types.len() != 1 {
@ -1544,6 +1600,13 @@ impl<'a> Semantics<'a> {
);
Some(Type::Error)
}
Some(Declaration::Import { .. }) => {
self.report_error_tree(
tree,
format!("'{token}' is an imported module and cannot be used as a type"),
);
Some(Type::Error)
}
None => {
if !environment.is_error {
self.report_error_tree(tree, format!("Unrecognized type: '{token}'"));
@ -1609,7 +1672,10 @@ impl<'a> Semantics<'a> {
TokenKind::Number => Type::F64,
TokenKind::String => Type::String,
TokenKind::True | TokenKind::False => Type::Bool,
_ => panic!("the token {tok} doesn't have a type!"),
_ => panic!(
"the token {} doesn't have a type!",
tok.as_str(&self.source)
),
};
Some(pig)
}
@ -1766,9 +1832,14 @@ impl<'a> Semantics<'a> {
return Some(Type::Error);
}
let Some(declaration) = env.bind(id) else {
let id_str = id.as_str(&self.source);
let Some(declaration) = env.bind(id_str) else {
if !env.is_error {
self.report_error(id.start, format!("'{typ}' has no member {id}"));
self.report_error_span(
id.start(),
id.end(),
format!("'{typ}' has no member {id_str}"),
);
}
return Some(Type::Error);
};
@ -1786,6 +1857,10 @@ impl<'a> Semantics<'a> {
let class = self.class_of(*ct);
class.static_env.clone()
}
// Type::Module(_name) => {
// // Woof. Would like to bind this now.
// todo!();
// }
Type::Error => return Environment::error(),
_ => {
self.report_error_tree_ref(t, format!("cannot access members of '{typ}'"));
@ -1820,7 +1895,7 @@ impl<'a> Semantics<'a> {
fn type_of_identifier(&self, t: TreeRef, tree: &Tree) -> Option<Type> {
assert_eq!(tree.kind, TreeKind::Identifier);
let id = tree.nth_token(0)?;
let id = tree.nth_token(0)?.as_str(&self.source);
let environment = self.environment_of(t);
if let Some(declaration) = environment.bind(id) {
return Some(self.type_of_declaration(t, declaration));
@ -1836,6 +1911,7 @@ impl<'a> Semantics<'a> {
match declaration {
Declaration::Variable { declaration, .. } => self.type_of(*declaration),
Declaration::Function { declaration, .. } => self.type_of(*declaration),
Declaration::Import { declaration, .. } => self.type_of(*declaration),
Declaration::ExternFunction {
declaration_type, ..
} => declaration_type.clone(),
@ -1867,7 +1943,7 @@ impl<'a> Semantics<'a> {
fn type_of_self_reference(&self, t: TreeRef, tree: &Tree) -> Option<Type> {
assert_eq!(tree.kind, TreeKind::SelfReference);
let id = tree.nth_token(0)?;
let id = tree.nth_token(0)?.as_str(&self.source);
let environment = self.environment_of(t);
if let Some(declaration) = environment.bind(id) {
return Some(match declaration {
@ -1886,7 +1962,7 @@ impl<'a> Semantics<'a> {
}
fn type_of_function_decl(&self, tree: &Tree) -> Option<Type> {
let param_list = tree.child_tree_of_kind(self.syntax_tree, TreeKind::ParamList)?;
let param_list = tree.child_tree_of_kind(&self.syntax_tree, TreeKind::ParamList)?;
// NOTE: The methodness here is determined by the presence of a self
// parameter, even if that parameter is incorrect (e.g., this
@ -1905,7 +1981,7 @@ impl<'a> Semantics<'a> {
}
}
let return_type = match tree.child_of_kind(self.syntax_tree, TreeKind::ReturnType) {
let return_type = match tree.child_of_kind(&self.syntax_tree, TreeKind::ReturnType) {
Some(t) => self.type_of(t),
None => Type::Nothing,
};
@ -1919,7 +1995,7 @@ impl<'a> Semantics<'a> {
fn type_of_parameter(&self, tree: &Tree) -> Option<Type> {
assert_eq!(tree.kind, TreeKind::Parameter);
match tree.child_of_kind(self.syntax_tree, TreeKind::TypeExpression) {
match tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression) {
Some(t) => Some(self.type_of(t)),
None => {
self.report_error_tree(tree, format!("the parameter is missing a type"));
@ -1991,7 +2067,7 @@ impl<'a> Semantics<'a> {
// The details of a class are computed lazily, but this is enough of
// a belly-button.
let name = tree.nth_token(1)?;
Some(Type::Object(t, name.as_str().into()))
Some(Type::Object(t, name.as_str(&self.source).into()))
}
fn type_of_field_decl(&self, tree: &Tree) -> Option<Type> {
@ -2013,7 +2089,7 @@ impl<'a> Semantics<'a> {
// Form 2: { x, ... }
let environment = self.environment_of(t);
let id = tree.nth_token(0)?;
let id = tree.nth_token(0)?.as_str(&self.source);
let declaration = match environment.bind(id) {
Some(d) => d,
None => {
@ -2038,6 +2114,13 @@ impl<'a> Semantics<'a> {
);
Some(Type::Error)
}
Declaration::Import { .. } => {
self.report_error_tree(
tree,
format!("'{id}' is an imported module, and cannot be the value of a field"),
);
Some(Type::Error)
}
}
}
@ -2063,12 +2146,12 @@ impl<'a> Semantics<'a> {
}
fn type_of_match_expression(&self, tree: &Tree) -> Option<Type> {
Some(self.type_of(tree.child_of_kind(self.syntax_tree, TreeKind::MatchBody)?))
Some(self.type_of(tree.child_of_kind(&self.syntax_tree, TreeKind::MatchBody)?))
}
fn type_of_match_body(&self, tree: &Tree) -> Option<Type> {
let arms: Vec<_> = tree
.children_of_kind(self.syntax_tree, TreeKind::MatchArm)
.children_of_kind(&self.syntax_tree, TreeKind::MatchArm)
.collect();
if arms.len() == 0 {
@ -2106,7 +2189,15 @@ impl<'a> Semantics<'a> {
fn type_of_pattern(&self, tree: &Tree) -> Option<Type> {
// We know that we have a type expression in here, that's what we're asking about.
Some(self.type_of(tree.child_of_kind(self.syntax_tree, TreeKind::TypeExpression)?))
Some(self.type_of(tree.child_of_kind(&self.syntax_tree, TreeKind::TypeExpression)?))
}
fn type_of_import(&self, tree: &Tree) -> Option<Type> {
let tok = tree.nth_token(1)?;
if tok.kind != TokenKind::String {
return Some(Type::Error); // Already reported as syntax error
}
Some(Type::Module(tok.as_str(&self.source).into()))
}
// TODO: Really want to TEST THIS also uh can we generate bytecode for functions and call it??
@ -2118,10 +2209,10 @@ impl<'a> Semantics<'a> {
TreeKind::LiteralExpression => {
let tok = tree.nth_token(0)?;
match self.type_of(t) {
Type::F64 => Some(StackValue::Float(tok.as_str().parse().unwrap())),
Type::F64 => Some(StackValue::Float(tok.as_str(&self.source).parse().unwrap())),
Type::Bool => Some(StackValue::Bool(tok.kind == TokenKind::True)),
Type::String => Some(StackValue::String(
string_constant_to_string(tok.as_str()).into(),
string_constant_to_string(tok.as_str(&self.source)).into(),
)),
Type::Nothing => Some(StackValue::Nothing), // ?
_ => None,
@ -2132,7 +2223,7 @@ impl<'a> Semantics<'a> {
let pt = tree.nth_tree(2)?;
let pattern = &self.syntax_tree[pt];
if pattern
.child_of_kind(self.syntax_tree, TreeKind::WildcardPattern)
.child_of_kind(&self.syntax_tree, TreeKind::WildcardPattern)
.is_some()
{
Some(StackValue::Bool(true))
@ -2240,7 +2331,7 @@ impl<'a> Semantics<'a> {
pub fn dump_compiler_state(&self, tr: Option<TreeRef>) {
eprintln!("Parsed the tree as:");
eprintln!("\n{}", self.syntax_tree.dump(true));
eprintln!("\n{}", self.syntax_tree.dump(&self.source, true));
{
let errors = self.snapshot_errors();
@ -2288,6 +2379,9 @@ impl<'a> Semantics<'a> {
Declaration::Class { declaration, .. } => {
eprintln!(" (class {declaration:?})");
}
Declaration::Import { declaration, .. } => {
eprintln!(" (imported module {declaration:?})");
}
};
}
environment = env.parent.clone();
@ -2376,6 +2470,10 @@ pub fn check(s: &Semantics) {
TreeKind::MatchExpression => {}
TreeKind::WhileStatement => check_while_statement(s, tree),
TreeKind::Import => {
// TODO: Check Import Statement
}
}
}
}
@ -2399,12 +2497,12 @@ fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) {
assert_eq!(tree.kind, TreeKind::FunctionDecl);
let _ = s.environment_of(t);
let return_type_tree = tree.child_of_kind(s.syntax_tree, TreeKind::ReturnType);
let return_type_tree = tree.child_of_kind(&s.syntax_tree, TreeKind::ReturnType);
let return_type = return_type_tree
.map(|t| s.type_of(t))
.unwrap_or(Type::Nothing);
if let Some(body) = tree.child_of_kind(s.syntax_tree, TreeKind::Block) {
if let Some(body) = tree.child_of_kind(&s.syntax_tree, TreeKind::Block) {
let body_type = s.type_of(body);
if !s.can_convert(&body_type, &return_type) {
// Just work very hard to get an appropriate error span.
@ -2418,8 +2516,7 @@ fn check_function_decl(s: &Semantics, t: TreeRef, tree: &Tree) {
let end_tok = tree
.nth_token(1)
.unwrap_or_else(|| tree.nth_token(0).unwrap());
let end_pos = end_tok.start + end_tok.as_str().len();
(start, end_pos)
(start, end_tok.end())
});
s.report_error_span(start, end, format!("the body of this function yields a value of type '{body_type}', but callers expect this function to produce a '{return_type}'"));
@ -2434,9 +2531,11 @@ fn check_let(s: &Semantics, tree: &Tree) {
let Some(expr) = tree.nth_tree(3) else { return };
if let Type::Method(..) = s.type_of(expr) {
let start = name.start;
let end = name.start + name.as_str().len();
s.report_error_span(start, end, "methods cannot be assigned to variables");
s.report_error_span(
name.start(),
name.end(),
"methods cannot be assigned to variables",
);
}
}
@ -2491,7 +2590,7 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) {
let Some(type_expression) = tree.nth_tree(1) else {
return;
};
let Some(field_list) = tree.child_tree_of_kind(s.syntax_tree, TreeKind::FieldList) else {
let Some(field_list) = tree.child_tree_of_kind(&s.syntax_tree, TreeKind::FieldList) else {
return;
};
@ -2502,11 +2601,11 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) {
let mut any_errors = false;
let mut field_bindings = HashMap::new();
for field in field_list.children_of_kind(s.syntax_tree, TreeKind::FieldValue) {
for field in field_list.children_of_kind(&s.syntax_tree, TreeKind::FieldValue) {
let f = &s.syntax_tree[field];
if let Some(name) = f.nth_token(0) {
let field_type = s.type_of(field);
field_bindings.insert(name.as_str(), (field, field_type));
field_bindings.insert(name.as_str(&s.source), (field, field_type));
} else {
any_errors = true;
}
@ -2552,12 +2651,13 @@ fn check_new_object_expression(s: &Semantics, tree: &Tree) {
fn check_class_declaration(s: &Semantics, tree: &Tree) {
let mut fields = HashMap::new();
for field in tree.children_of_kind(s.syntax_tree, TreeKind::FieldDecl) {
for field in tree.children_of_kind(&s.syntax_tree, TreeKind::FieldDecl) {
let f = &s.syntax_tree[field];
let Some(name) = f.nth_token(0) else {
continue;
};
match fields.insert(name.as_str(), field) {
let name = name.as_str(&s.source);
match fields.insert(name, field) {
Some(_) => {
s.report_error_tree(f, format!("duplicate definition of field '{name}'"));
}
@ -2595,7 +2695,7 @@ fn check_match_body(s: &Semantics, t: TreeRef, _tree: &Tree) {
// https://doc.rust-lang.org/nightly/nightly-rustc/rustc_pattern_analysis/usefulness/index.html
// let arms: Vec<_> = tree
// .children_of_kind(s.syntax_tree, TreeKind::MatchArm)
// .children_of_kind(&s.syntax_tree, TreeKind::MatchArm)
// .collect();
// if arms.len() > 0 {
@ -2626,8 +2726,9 @@ mod tests {
#[test]
#[should_panic(expected = "INTERNAL COMPILER ERROR: oh no")]
pub fn ice() {
let (tree, lines) = parse("1 + 1");
let semantics = Semantics::new(&tree, &lines);
let source: Rc<str> = "1+1".into();
let (tree, lines) = parse(&source);
let semantics = Semantics::new(source, tree.clone(), lines);
semantics.internal_compiler_error(tree.root(), "oh no");
}
}

View file

@ -37,6 +37,7 @@ pub enum TokenKind {
Number,
And,
As,
Async,
Await,
Class,
@ -63,53 +64,60 @@ pub enum TokenKind {
}
// NOTE: Tokens are kinda big (like 40 bytes?) and AFAICT the only way to go
// smaller would be to stop using string pointers and use smaller
// sizes/offsets instead, e.g., 32b for offset and 32b for size, and
// stop tracking the position independently from the start, and then
// require the source text when converting to line/col. I'm unwilling to
// give up the ergonomics of &str and String right now, so we're just
// not doing it.
// smaller would be to find some other way to represent the error in an
// error token, but I'm kinda unwilling to do that.
//
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Token<'a> {
pub struct Token {
pub kind: TokenKind,
pub start: usize,
value: Result<&'a str, Box<str>>,
start: usize,
end: usize,
error: Option<Box<str>>,
}
impl<'a> Token<'a> {
pub fn new(kind: TokenKind, start: usize, value: &'a str) -> Self {
impl Token {
pub fn new(kind: TokenKind, start: usize, end: usize) -> Self {
Token {
kind,
start,
value: Ok(value),
end,
error: None,
}
}
pub fn error(start: usize, message: String) -> Self {
pub fn error(start: usize, end: usize, message: String) -> Self {
Token {
kind: TokenKind::Error,
start,
value: Err(message.into()),
end,
error: Some(message.into()),
}
}
pub fn as_str<'b>(&'b self) -> &'a str
pub fn start(&self) -> usize {
self.start
}
pub fn end(&self) -> usize {
self.end
}
pub fn len(&self) -> usize {
self.end() - self.start()
}
pub fn as_str<'a, 'b>(&'a self, source: &'b str) -> &'a str
where
'b: 'a,
{
match &self.value {
Ok(v) => v,
Err(e) => &e,
if let Some(error) = &self.error {
&error
} else {
&source[self.start()..self.end()]
}
}
}
impl<'a> std::fmt::Display for Token<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
pub struct Lines {
newlines: Vec<usize>,
}
@ -169,6 +177,10 @@ impl<'a> Tokens<'a> {
result
}
pub fn source(&self) -> &'a str {
self.source
}
pub fn lines(self) -> Lines {
self.lines
}
@ -179,12 +191,11 @@ impl<'a> Tokens<'a> {
self.lines.token_position(token)
}
fn token(&self, start: usize, kind: TokenKind) -> Token<'a> {
let value = &self.source[start..self.pos()];
Token::new(kind, start, value)
fn token(&self, start: usize, kind: TokenKind) -> Token {
Token::new(kind, start, self.pos())
}
fn number(&mut self, start: usize) -> Token<'a> {
fn number(&mut self, start: usize) -> Token {
// First, the main part.
loop {
if !self.matches_digit() {
@ -225,6 +236,7 @@ impl<'a> Tokens<'a> {
let slice = &self.source[start..self.pos()];
return Token::error(
start,
self.pos(),
format!("Invalid floating-point literal: {slice}"),
);
}
@ -238,10 +250,14 @@ impl<'a> Tokens<'a> {
self.token(start, TokenKind::Number)
}
fn string(&mut self, start: usize, delimiter: char) -> Token<'a> {
fn string(&mut self, start: usize, delimiter: char) -> Token {
while !self.matches(delimiter) {
if self.eof() {
return Token::error(start, "Unterminated string constant".to_string());
return Token::error(
start,
self.pos(),
"Unterminated string constant".to_string(),
);
}
if self.matches('\\') {
self.advance();
@ -259,6 +275,9 @@ impl<'a> Tokens<'a> {
if ident == "and" {
return TokenKind::And;
}
if ident == "as" {
return TokenKind::As;
}
if ident == "async" {
return TokenKind::Async;
}
@ -363,7 +382,7 @@ impl<'a> Tokens<'a> {
TokenKind::Identifier
}
fn identifier(&mut self, start: usize) -> Token<'a> {
fn identifier(&mut self, start: usize) -> Token {
loop {
// TODO: Use unicode identifier classes instead
if !self.matches_next(|c| c.is_ascii_alphanumeric() || c == '_') {
@ -373,7 +392,7 @@ impl<'a> Tokens<'a> {
let ident = &self.source[start..self.pos()];
let kind = Self::identifier_token_kind(ident);
Token::new(kind, start, ident)
Token::new(kind, start, self.pos())
}
fn matches(&mut self, ch: char) -> bool {
@ -420,7 +439,7 @@ impl<'a> Tokens<'a> {
self.next_char.is_none()
}
fn whitespace(&mut self, pos: usize) -> Token<'a> {
fn whitespace(&mut self, pos: usize) -> Token {
while let Some((pos, ch)) = self.next_char {
if ch == '\n' {
self.lines.add_line(pos);
@ -432,7 +451,7 @@ impl<'a> Tokens<'a> {
self.token(pos, TokenKind::Whitespace)
}
fn comment(&mut self, pos: usize) -> Token<'a> {
fn comment(&mut self, pos: usize) -> Token {
while let Some((_, ch)) = self.next_char {
if ch == '\n' {
break;
@ -442,7 +461,7 @@ impl<'a> Tokens<'a> {
self.token(pos, TokenKind::Comment)
}
pub fn next(&mut self) -> Token<'a> {
pub fn next(&mut self) -> Token {
let (pos, c) = match self.advance() {
Some((p, c)) => (p, c),
None => return self.token(self.source.len(), TokenKind::EOF),
@ -516,7 +535,7 @@ impl<'a> Tokens<'a> {
} else if c.is_ascii_alphabetic() || c == '_' {
self.identifier(pos)
} else {
Token::error(pos, format!("Unexpected character '{c}'"))
Token::error(pos, self.pos(), format!("Unexpected character '{c}'"))
}
}
}
@ -552,9 +571,9 @@ mod tests {
let mut expected: Vec<Token> = (vec![$($s),*])
.into_iter()
.map(|t| Token::new(t.1, t.0, t.2))
.map(|t| Token::new(t.1, t.0, t.0 + t.2.len()))
.collect();
expected.push(Token::new(TokenKind::EOF, $input.len(), ""));
expected.push(Token::new(TokenKind::EOF, $input.len(), $input.len()));
test_tokens_impl($input, expected);
}
@ -611,11 +630,12 @@ mod tests {
test_tokens!(
more_more_keywords,
"in is match _",
"in is match _ as",
(0, In, "in"),
(3, Is, "is"),
(6, Match, "match"),
(12, Underscore, "_")
(12, Underscore, "_"),
(14, As, "as")
);
test_tokens!(

View file

@ -5,6 +5,7 @@ use fine::tokens::Lines;
use fine::vm::{eval_export_fn, Context};
use pretty_assertions::assert_eq;
use std::fmt::Write as _;
use std::rc::Rc;
fn rebase_section(source_path: &str, section: &str, value: &str) {
let contents = std::fs::read_to_string(source_path)
@ -83,8 +84,8 @@ fn should_rebase() -> bool {
}
}
fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
let dump = tree.dump(false);
fn assert_concrete(source: Rc<str>, tree: Rc<SyntaxTree>, expected: &str, source_path: &str) {
let dump = tree.dump(&source, false);
if dump != expected {
if should_rebase() {
rebase_section(source_path, "concrete", &dump)
@ -128,13 +129,14 @@ macro_rules! semantic_assert_eq {
}
fn assert_type_at(
tree: &SyntaxTree,
lines: &Lines,
source: Rc<str>,
tree: Rc<SyntaxTree>,
lines: Rc<Lines>,
pos: usize,
expected: &str,
_source_path: &str,
) {
let semantics = Semantics::new(tree, lines);
let semantics = Semantics::new(source, tree.clone(), lines);
let tree_ref = match tree.find_tree_at(pos) {
Some(t) => t,
None => semantic_panic!(
@ -156,13 +158,14 @@ fn assert_type_at(
}
fn assert_type_error_at(
tree: &SyntaxTree,
lines: &Lines,
source: Rc<str>,
tree: Rc<SyntaxTree>,
lines: Rc<Lines>,
pos: usize,
expected: &str,
_source_path: &str,
) {
let semantics = Semantics::new(tree, lines);
let semantics = Semantics::new(source, tree.clone(), lines);
let tree_ref = match tree.find_tree_at(pos) {
Some(t) => t,
None => semantic_panic!(
@ -222,9 +225,15 @@ fn dump_module(out: &mut String, module: &Module) -> std::fmt::Result {
Ok(())
}
fn assert_compiles_to(tree: &SyntaxTree, lines: &Lines, expected: &str, source_path: &str) {
let semantics = Semantics::new(tree, lines);
let module = compile(&semantics);
fn assert_compiles_to(
source: Rc<str>,
tree: Rc<SyntaxTree>,
lines: Rc<Lines>,
expected: &str,
source_path: &str,
) {
let semantics = Rc::new(Semantics::new(source, tree, lines));
let module = compile(semantics.clone());
let mut actual = String::new();
dump_module(&mut actual, &module).expect("no dumping?");
@ -244,8 +253,8 @@ fn assert_compiles_to(tree: &SyntaxTree, lines: &Lines, expected: &str, source_p
}
}
fn assert_no_errors(tree: &SyntaxTree, lines: &Lines) {
let semantics = Semantics::new(tree, lines);
fn assert_no_errors(source: Rc<str>, tree: Rc<SyntaxTree>, lines: Rc<Lines>) {
let semantics = Semantics::new(source, tree, lines);
check(&semantics);
let expected_errors: Vec<Error> = Vec::new();
@ -259,10 +268,10 @@ fn assert_no_errors(tree: &SyntaxTree, lines: &Lines) {
);
}
fn assert_eval_ok(tree: &SyntaxTree, lines: &Lines, expected: &str) {
let semantics = Semantics::new(tree, lines);
fn assert_eval_ok(source: Rc<str>, tree: Rc<SyntaxTree>, lines: Rc<Lines>, expected: &str) {
let semantics = Rc::new(Semantics::new(source, tree, lines));
let module = compile(&semantics);
let module = compile(semantics.clone());
let mut context = Context::new(module.clone());
context.init().expect("Unable to initialize module");
@ -301,8 +310,13 @@ fn assert_eval_ok(tree: &SyntaxTree, lines: &Lines, expected: &str) {
}
}
fn assert_errors(tree: &SyntaxTree, lines: &Lines, expected_errors: Vec<&str>) {
let semantics = Semantics::new(tree, lines);
fn assert_errors(
source: Rc<str>,
tree: Rc<SyntaxTree>,
lines: Rc<Lines>,
expected_errors: Vec<&str>,
) {
let semantics = Semantics::new(source, tree, lines);
check(&semantics);
let errors: Vec<String> = semantics
@ -320,8 +334,8 @@ fn assert_errors(tree: &SyntaxTree, lines: &Lines, expected_errors: Vec<&str>) {
);
}
fn assert_check_error(tree: &SyntaxTree, lines: &Lines, expected: &str) {
let semantics = Semantics::new(tree, lines);
fn assert_check_error(source: Rc<str>, tree: Rc<SyntaxTree>, lines: Rc<Lines>, expected: &str) {
let semantics = Semantics::new(source, tree, lines);
check(&semantics);
let errors = semantics.snapshot_errors();