diff --git a/fine/build.rs b/fine/build.rs index 0038fbb0..d6b2c298 100644 --- a/fine/build.rs +++ b/fine/build.rs @@ -15,7 +15,7 @@ fn generate_test_for_file(path: PathBuf) -> String { while let Some(line) = lines.next() { let line = match line.strip_prefix("//") { Some(line) => line, - None => break, + None => continue, }; let line = line.trim(); diff --git a/fine/src/parser.rs b/fine/src/parser.rs index 56e13684..d2a8fc61 100644 --- a/fine/src/parser.rs +++ b/fine/src/parser.rs @@ -1,6 +1,7 @@ // NOTE: much of this parser structure derived from // https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html use crate::tokens::{Lines, Token, TokenKind, Tokens}; +use std::fmt::Write as _; use std::{cell::Cell, num::NonZeroU32}; pub mod old; // Until I decide to delete it. @@ -22,10 +23,22 @@ impl<'a> SyntaxTree<'a> { self.root } - pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef { + pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef { assert!(t.parent.is_none()); let tr = TreeRef::from_index(self.trees.len()); + t.start_pos = t + .children + .first() + .map(|c| c.start_position(&self)) + .unwrap_or(0); + + t.end_pos = t + .children + .last() + .map(|c| c.end_position(&self)) + .unwrap_or(t.start_pos); + // NOTE: Because of the difficulty of holding multiple mutable // references it's this is our best chance to patch up parent // pointers. @@ -34,23 +47,52 @@ impl<'a> SyntaxTree<'a> { self[*ct].parent = Some(tr); } } + self.trees.push(t); tr } - pub fn dump(&self) -> String { - match self.root { - Some(r) => self[r].dump(self), - None => String::new(), + pub fn dump(&self, with_positions: bool) -> String { + let mut output = String::new(); + if let Some(r) = self.root { + self[r].dump(self, with_positions, &mut output); } + output } - pub fn start_position(&self, t: TreeRef) -> Option { - self[t].start_position(&self) + pub fn start_position(&self, t: TreeRef) -> usize { + self[t].start_pos } - pub fn end_position(&self, t: TreeRef) -> Option { - self[t].end_position(&self) + pub fn end_position(&self, t: TreeRef) -> usize { + self[t].end_pos + } + + pub fn find_tree_at(&self, pos: usize) -> Option { + let mut current = self.root?; + let mut tree = &self[current]; + if pos < tree.start_pos || pos >= tree.end_pos { + return None; + } + + loop { + let mut found = false; + for child in &tree.children { + if let Child::Tree(next) = child { + let next_tree = &self[*next]; + if pos >= next_tree.start_pos && pos < next_tree.end_pos { + found = true; + current = *next; + tree = next_tree; + break; + } + } + } + + if !found { + return Some(current); + } + } } } @@ -95,6 +137,8 @@ pub enum TreeKind { pub struct Tree<'a> { pub kind: TreeKind, pub parent: Option, + pub start_pos: usize, + pub end_pos: usize, pub children: Vec>, } @@ -118,41 +162,9 @@ impl<'a> Tree<'a> { }) .flatten() } - - pub fn start_position(&self, tree: &SyntaxTree<'a>) -> Option { - for child in &self.children { - let start = match child { - Child::Tree(tr) => tree.start_position(*tr), - Child::Token(tok) => Some(tok.start), - }; - - if let Some(start) = start { - return Some(start); - } - } - - // Fundamentally no tokens in this tree. This seems *broken*. - None - } - - pub fn end_position(&self, tree: &SyntaxTree<'a>) -> Option { - for child in self.children.iter().rev() { - let end = match child { - Child::Tree(tr) => tree.end_position(*tr), - Child::Token(tok) => Some(tok.start + tok.as_str().len()), - }; - - if let Some(start) = end { - return Some(start); - } - } - - // Fundamentally no tokens in this tree. This seems *broken*. - None - } } -#[derive(Copy, Clone, Eq, PartialEq, Hash)] +#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)] pub struct TreeRef(NonZeroU32); impl TreeRef { @@ -168,13 +180,15 @@ impl TreeRef { } impl<'a> Tree<'a> { - pub fn dump(&self, tree: &SyntaxTree<'a>) -> String { - let mut output = String::new(); - output.push_str(&format!("{:?}\n", self.kind)); - for child in self.children.iter() { - child.dump_rec(2, tree, &mut output); + pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) { + let _ = write!(output, "{:?}", self.kind); + if with_positions { + let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos); + } + let _ = write!(output, "\n"); + for child in self.children.iter() { + child.dump_rec(2, tree, with_positions, output); } - output } } @@ -184,21 +198,52 @@ pub enum Child<'a> { } impl<'a> Child<'a> { - fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) { + fn dump_rec( + &self, + indent: usize, + tree: &SyntaxTree<'a>, + with_positions: bool, + output: &mut String, + ) { for _ in 0..indent { - output.push(' '); + let _ = write!(output, " "); } match self { - Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), + Child::Token(t) => { + let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str()); + if with_positions { + let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len()); + } + let _ = write!(output, "\n"); + } Child::Tree(t) => { let t = &tree[*t]; - output.push_str(&format!("{:?}\n", t.kind)); + let _ = write!(output, "{:?}", t.kind); + if with_positions { + let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos); + } + let _ = write!(output, "\n"); + for child in t.children.iter() { - child.dump_rec(indent + 2, tree, output); + child.dump_rec(indent + 2, tree, with_positions, output); } } } } + + pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize { + match &self { + Child::Token(t) => t.start, + Child::Tree(t) => syntax_tree[*t].start_pos, + } + } + + pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize { + match &self { + Child::Token(t) => t.start + t.as_str().len(), + Child::Tree(t) => syntax_tree[*t].end_pos, + } + } } enum ParseEvent<'a> { @@ -373,6 +418,8 @@ impl<'a> CParser<'a> { ParseEvent::Start { kind } => stack.push(Tree { kind, parent: None, + start_pos: 0, + end_pos: 0, children: Vec::new(), }), @@ -726,6 +773,10 @@ mod tests { fn tree_ref_size() { // What's the point of doing all that work if the tree ref isn't nice // and "small"? + // + // TODO: This is a dumb optimization because tokens are + // huge so Child is huge no matter what we do. If we retain + // tokens out of line then we can re-visit this optimization. assert_eq!(4, std::mem::size_of::>()); } } diff --git a/fine/src/semantics.rs b/fine/src/semantics.rs index 93d074ee..3f0ced21 100644 --- a/fine/src/semantics.rs +++ b/fine/src/semantics.rs @@ -51,7 +51,7 @@ impl fmt::Debug for Error { impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message) + write!(f, "{}:{}: {}", self.start.0, self.start.1, self.message) } } @@ -123,14 +123,14 @@ impl fmt::Display for Type { pub struct Semantics<'a> { // TODO: Do I really want my own copy here? Should we standardize on Arc // or Rc or some other nice sharing mechanism? - syntax_tree: SyntaxTree<'a>, - lines: Lines, + syntax_tree: &'a SyntaxTree<'a>, + lines: &'a Lines, errors: RefCell>, types: RefCell>, } impl<'a> Semantics<'a> { - pub fn new(tree: SyntaxTree<'a>, lines: Lines) -> Self { + pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self { let mut semantics = Semantics { syntax_tree: tree, lines, @@ -148,11 +148,7 @@ impl<'a> Semantics<'a> { semantics } - pub fn syntax(&self) -> &SyntaxTree<'a> { - &self.syntax_tree - } - - pub fn errors(&self) -> Vec { + pub fn snapshot_errors(&self) -> Vec { (*self.errors.borrow()).clone() } @@ -181,18 +177,15 @@ impl<'a> Semantics<'a> { where T: ToString, { - let start = tree.start_position(&self.syntax_tree).unwrap(); - let end = tree.start_position(&self.syntax_tree).unwrap(); - self.report_error_span(start, end, error) + self.report_error_span(tree.start_pos, tree.end_pos, error) } fn report_error_tree_ref(&self, tree: TreeRef, error: T) where T: ToString, { - let start = self.syntax_tree.start_position(tree).unwrap(); - let end = self.syntax_tree.end_position(tree).unwrap(); - self.report_error_span(start, end, error) + let tree = &self.syntax_tree[tree]; + self.report_error_span(tree.start_pos, tree.end_pos, error) } fn gather_errors(&mut self, tree: TreeRef) { @@ -259,7 +252,7 @@ impl<'a> Semantics<'a> { match (op.kind, argument_type) { (TokenKind::Plus, Type::F64) => Some(Type::F64), (TokenKind::Minus, Type::F64) => Some(Type::F64), - (TokenKind::Bang, Type::Bool) => Some(Type::F64), + (TokenKind::Bang, Type::Bool) => Some(Type::Bool), // This is dumb and should be punished, probably. (_, Type::Unreachable) => { diff --git a/fine/src/tokens.rs b/fine/src/tokens.rs index dedbf864..c634d7c3 100644 --- a/fine/src/tokens.rs +++ b/fine/src/tokens.rs @@ -429,7 +429,11 @@ impl<'a> Tokens<'a> { }; match c { - ' ' | '\t' | '\r' | '\n' => self.whitespace(pos), + ' ' | '\t' | '\r' => self.whitespace(pos), + '\n' => { + self.lines.add_line(pos); + self.whitespace(pos) + } '{' => self.token(pos, TokenKind::LeftBrace), '}' => self.token(pos, TokenKind::RightBrace), '[' => self.token(pos, TokenKind::LeftBracket), diff --git a/fine/tests/example_tests.rs b/fine/tests/example_tests.rs index 36efaff4..a80b057b 100644 --- a/fine/tests/example_tests.rs +++ b/fine/tests/example_tests.rs @@ -1,4 +1,5 @@ use fine::parser::SyntaxTree; +use fine::semantics::{Semantics, Type}; use fine::tokens::Lines; use pretty_assertions::assert_eq; @@ -70,7 +71,7 @@ fn rebase_concrete(source_path: &str, dump: &str) { } fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) { - let dump = tree.dump(); + let dump = tree.dump(false); let rebase = std::env::var("FINE_TEST_REBASE") .unwrap_or(String::new()) .to_lowercase(); @@ -85,12 +86,52 @@ fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) { } fn assert_type_at( - _tree: &SyntaxTree, - _lines: &Lines, - _pos: usize, - _expected: &str, + tree: &SyntaxTree, + lines: &Lines, + pos: usize, + expected: &str, _source_path: &str, ) { + let tree_ref = match tree.find_tree_at(pos) { + Some(t) => t, + None => { + println!("Unable to find the subtee at position {pos}! Parsed the tree as:"); + println!("\n{}", tree.dump(true)); + panic!("Cannot find tree at position {pos}"); + } + }; + + let semantics = Semantics::new(tree, lines); + let tree_type = semantics.type_of(tree_ref, true); + + let actual = format!("{}", tree_type.unwrap_or(Type::Error)); + if actual != expected { + println!( + "The type of the {:?} tree at position {pos} had the wrong type! Parsed the tree as:", + tree[tree_ref].kind + ); + println!("\n{}", tree.dump(true)); + + let errors = semantics.snapshot_errors(); + if errors.len() == 0 { + println!("There were no errors reported during type checking.\n"); + } else { + println!( + "{} error{} reported during type checking:", + errors.len(), + if errors.len() == 1 { "" } else { "s" } + ); + for error in &errors { + println!(" Error: {error}"); + } + println!(); + } + + assert_eq!( + expected, actual, + "The type of the tree at position {pos} was incorrect" + ); + } } include!(concat!(env!("OUT_DIR"), "/generated_tests.rs")); diff --git a/fine/tests/expression/arithmetic.fine b/fine/tests/expression/arithmetic.fine index fe61663a..f5ac71e3 100644 --- a/fine/tests/expression/arithmetic.fine +++ b/fine/tests/expression/arithmetic.fine @@ -20,3 +20,5 @@ // | Semicolon:'";"' // 1 * 2 + -3 * 4; + +// type: 532 f64 \ No newline at end of file diff --git a/fine/tests/expression/boolean.fine b/fine/tests/expression/boolean.fine index 340b9e65..01f570f1 100644 --- a/fine/tests/expression/boolean.fine +++ b/fine/tests/expression/boolean.fine @@ -20,3 +20,5 @@ // | Semicolon:'";"' // true and false or false and !true; + +// type: 549 bool diff --git a/fine/tests/expression/conditional.fine b/fine/tests/expression/conditional.fine index a3224eb1..a8c03cd7 100644 --- a/fine/tests/expression/conditional.fine +++ b/fine/tests/expression/conditional.fine @@ -24,3 +24,21 @@ // | RightBrace:'"}"' // if true { "discarded"; 23 } else { 45 } + +// Here come some type probes! +// (type of the condition) +// type: 667 bool +// +// (the discarded expression) +// type: 674 string +// +// (the "then" clause) +// type: 686 f64 +// type: 689 f64 +// +// (the "else" clause) +// type: 696 f64 +// type: 699 f64 +// +// (the overall expression) +// type: 664 f64 \ No newline at end of file diff --git a/fine/tests/expression/number.fine b/fine/tests/expression/number.fine index 08274c06..ae01ea54 100644 --- a/fine/tests/expression/number.fine +++ b/fine/tests/expression/number.fine @@ -4,6 +4,7 @@ // | LiteralExpression // | Number:'"42"' // | Semicolon:'";"' -// -// type: 138 Number + 42; + +// type: 129 f64 diff --git a/fine/tests/expression/strings.fine b/fine/tests/expression/strings.fine index eab2d5ea..808ff8ff 100644 --- a/fine/tests/expression/strings.fine +++ b/fine/tests/expression/strings.fine @@ -10,3 +10,5 @@ // | Semicolon:'";"' // "Hello " + 'world!'; + +// type: 261 string \ No newline at end of file