[fine] Type testing with probes and reporting

I'm proud of the test harness here actually. Also fix a bug in
checking!
This commit is contained in:
John Doty 2024-01-05 17:10:15 -08:00
parent c0f40aa512
commit 618e0028d3
10 changed files with 192 additions and 78 deletions

View file

@ -15,7 +15,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
while let Some(line) = lines.next() { while let Some(line) = lines.next() {
let line = match line.strip_prefix("//") { let line = match line.strip_prefix("//") {
Some(line) => line, Some(line) => line,
None => break, None => continue,
}; };
let line = line.trim(); let line = line.trim();

View file

@ -1,6 +1,7 @@
// NOTE: much of this parser structure derived from // NOTE: much of this parser structure derived from
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html // https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
use crate::tokens::{Lines, Token, TokenKind, Tokens}; use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt::Write as _;
use std::{cell::Cell, num::NonZeroU32}; use std::{cell::Cell, num::NonZeroU32};
pub mod old; // Until I decide to delete it. pub mod old; // Until I decide to delete it.
@ -22,10 +23,22 @@ impl<'a> SyntaxTree<'a> {
self.root self.root
} }
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef { pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
assert!(t.parent.is_none()); assert!(t.parent.is_none());
let tr = TreeRef::from_index(self.trees.len()); let tr = TreeRef::from_index(self.trees.len());
t.start_pos = t
.children
.first()
.map(|c| c.start_position(&self))
.unwrap_or(0);
t.end_pos = t
.children
.last()
.map(|c| c.end_position(&self))
.unwrap_or(t.start_pos);
// NOTE: Because of the difficulty of holding multiple mutable // NOTE: Because of the difficulty of holding multiple mutable
// references it's this is our best chance to patch up parent // references it's this is our best chance to patch up parent
// pointers. // pointers.
@ -34,23 +47,52 @@ impl<'a> SyntaxTree<'a> {
self[*ct].parent = Some(tr); self[*ct].parent = Some(tr);
} }
} }
self.trees.push(t); self.trees.push(t);
tr tr
} }
pub fn dump(&self) -> String { pub fn dump(&self, with_positions: bool) -> String {
match self.root { let mut output = String::new();
Some(r) => self[r].dump(self), if let Some(r) = self.root {
None => String::new(), self[r].dump(self, with_positions, &mut output);
}
output
}
pub fn start_position(&self, t: TreeRef) -> usize {
self[t].start_pos
}
pub fn end_position(&self, t: TreeRef) -> usize {
self[t].end_pos
}
pub fn find_tree_at(&self, pos: usize) -> Option<TreeRef> {
let mut current = self.root?;
let mut tree = &self[current];
if pos < tree.start_pos || pos >= tree.end_pos {
return None;
}
loop {
let mut found = false;
for child in &tree.children {
if let Child::Tree(next) = child {
let next_tree = &self[*next];
if pos >= next_tree.start_pos && pos < next_tree.end_pos {
found = true;
current = *next;
tree = next_tree;
break;
}
} }
} }
pub fn start_position(&self, t: TreeRef) -> Option<usize> { if !found {
self[t].start_position(&self) return Some(current);
}
} }
pub fn end_position(&self, t: TreeRef) -> Option<usize> {
self[t].end_position(&self)
} }
} }
@ -95,6 +137,8 @@ pub enum TreeKind {
pub struct Tree<'a> { pub struct Tree<'a> {
pub kind: TreeKind, pub kind: TreeKind,
pub parent: Option<TreeRef>, pub parent: Option<TreeRef>,
pub start_pos: usize,
pub end_pos: usize,
pub children: Vec<Child<'a>>, pub children: Vec<Child<'a>>,
} }
@ -118,41 +162,9 @@ impl<'a> Tree<'a> {
}) })
.flatten() .flatten()
} }
pub fn start_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in &self.children {
let start = match child {
Child::Tree(tr) => tree.start_position(*tr),
Child::Token(tok) => Some(tok.start),
};
if let Some(start) = start {
return Some(start);
}
} }
// Fundamentally no tokens in this tree. This seems *broken*. #[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
None
}
pub fn end_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in self.children.iter().rev() {
let end = match child {
Child::Tree(tr) => tree.end_position(*tr),
Child::Token(tok) => Some(tok.start + tok.as_str().len()),
};
if let Some(start) = end {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
pub struct TreeRef(NonZeroU32); pub struct TreeRef(NonZeroU32);
impl TreeRef { impl TreeRef {
@ -168,13 +180,15 @@ impl TreeRef {
} }
impl<'a> Tree<'a> { impl<'a> Tree<'a> {
pub fn dump(&self, tree: &SyntaxTree<'a>) -> String { pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
let mut output = String::new(); let _ = write!(output, "{:?}", self.kind);
output.push_str(&format!("{:?}\n", self.kind)); if with_positions {
for child in self.children.iter() { let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
child.dump_rec(2, tree, &mut output); }
let _ = write!(output, "\n");
for child in self.children.iter() {
child.dump_rec(2, tree, with_positions, output);
} }
output
} }
} }
@ -184,21 +198,52 @@ pub enum Child<'a> {
} }
impl<'a> Child<'a> { impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) { fn dump_rec(
&self,
indent: usize,
tree: &SyntaxTree<'a>,
with_positions: bool,
output: &mut String,
) {
for _ in 0..indent { for _ in 0..indent {
output.push(' '); let _ = write!(output, " ");
} }
match self { match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())), Child::Token(t) => {
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
if with_positions {
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
}
let _ = write!(output, "\n");
}
Child::Tree(t) => { Child::Tree(t) => {
let t = &tree[*t]; let t = &tree[*t];
output.push_str(&format!("{:?}\n", t.kind)); let _ = write!(output, "{:?}", t.kind);
if with_positions {
let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos);
}
let _ = write!(output, "\n");
for child in t.children.iter() { for child in t.children.iter() {
child.dump_rec(indent + 2, tree, output); child.dump_rec(indent + 2, tree, with_positions, output);
} }
} }
} }
} }
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start,
Child::Tree(t) => syntax_tree[*t].start_pos,
}
}
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start + t.as_str().len(),
Child::Tree(t) => syntax_tree[*t].end_pos,
}
}
} }
enum ParseEvent<'a> { enum ParseEvent<'a> {
@ -373,6 +418,8 @@ impl<'a> CParser<'a> {
ParseEvent::Start { kind } => stack.push(Tree { ParseEvent::Start { kind } => stack.push(Tree {
kind, kind,
parent: None, parent: None,
start_pos: 0,
end_pos: 0,
children: Vec::new(), children: Vec::new(),
}), }),
@ -726,6 +773,10 @@ mod tests {
fn tree_ref_size() { fn tree_ref_size() {
// What's the point of doing all that work if the tree ref isn't nice // What's the point of doing all that work if the tree ref isn't nice
// and "small"? // and "small"?
//
// TODO: This is a dumb optimization because tokens are
// huge so Child is huge no matter what we do. If we retain
// tokens out of line then we can re-visit this optimization.
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>()); assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
} }
} }

View file

@ -51,7 +51,7 @@ impl fmt::Debug for Error {
impl fmt::Display for Error { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message) write!(f, "{}:{}: {}", self.start.0, self.start.1, self.message)
} }
} }
@ -123,14 +123,14 @@ impl fmt::Display for Type {
pub struct Semantics<'a> { pub struct Semantics<'a> {
// TODO: Do I really want my own copy here? Should we standardize on Arc // TODO: Do I really want my own copy here? Should we standardize on Arc
// or Rc or some other nice sharing mechanism? // or Rc or some other nice sharing mechanism?
syntax_tree: SyntaxTree<'a>, syntax_tree: &'a SyntaxTree<'a>,
lines: Lines, lines: &'a Lines,
errors: RefCell<Vec<Error>>, errors: RefCell<Vec<Error>>,
types: RefCell<HashMap<TreeRef, Type>>, types: RefCell<HashMap<TreeRef, Type>>,
} }
impl<'a> Semantics<'a> { impl<'a> Semantics<'a> {
pub fn new(tree: SyntaxTree<'a>, lines: Lines) -> Self { pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self {
let mut semantics = Semantics { let mut semantics = Semantics {
syntax_tree: tree, syntax_tree: tree,
lines, lines,
@ -148,11 +148,7 @@ impl<'a> Semantics<'a> {
semantics semantics
} }
pub fn syntax(&self) -> &SyntaxTree<'a> { pub fn snapshot_errors(&self) -> Vec<Error> {
&self.syntax_tree
}
pub fn errors(&self) -> Vec<Error> {
(*self.errors.borrow()).clone() (*self.errors.borrow()).clone()
} }
@ -181,18 +177,15 @@ impl<'a> Semantics<'a> {
where where
T: ToString, T: ToString,
{ {
let start = tree.start_position(&self.syntax_tree).unwrap(); self.report_error_span(tree.start_pos, tree.end_pos, error)
let end = tree.start_position(&self.syntax_tree).unwrap();
self.report_error_span(start, end, error)
} }
fn report_error_tree_ref<T>(&self, tree: TreeRef, error: T) fn report_error_tree_ref<T>(&self, tree: TreeRef, error: T)
where where
T: ToString, T: ToString,
{ {
let start = self.syntax_tree.start_position(tree).unwrap(); let tree = &self.syntax_tree[tree];
let end = self.syntax_tree.end_position(tree).unwrap(); self.report_error_span(tree.start_pos, tree.end_pos, error)
self.report_error_span(start, end, error)
} }
fn gather_errors(&mut self, tree: TreeRef) { fn gather_errors(&mut self, tree: TreeRef) {
@ -259,7 +252,7 @@ impl<'a> Semantics<'a> {
match (op.kind, argument_type) { match (op.kind, argument_type) {
(TokenKind::Plus, Type::F64) => Some(Type::F64), (TokenKind::Plus, Type::F64) => Some(Type::F64),
(TokenKind::Minus, Type::F64) => Some(Type::F64), (TokenKind::Minus, Type::F64) => Some(Type::F64),
(TokenKind::Bang, Type::Bool) => Some(Type::F64), (TokenKind::Bang, Type::Bool) => Some(Type::Bool),
// This is dumb and should be punished, probably. // This is dumb and should be punished, probably.
(_, Type::Unreachable) => { (_, Type::Unreachable) => {

View file

@ -429,7 +429,11 @@ impl<'a> Tokens<'a> {
}; };
match c { match c {
' ' | '\t' | '\r' | '\n' => self.whitespace(pos), ' ' | '\t' | '\r' => self.whitespace(pos),
'\n' => {
self.lines.add_line(pos);
self.whitespace(pos)
}
'{' => self.token(pos, TokenKind::LeftBrace), '{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace), '}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket), '[' => self.token(pos, TokenKind::LeftBracket),

View file

@ -1,4 +1,5 @@
use fine::parser::SyntaxTree; use fine::parser::SyntaxTree;
use fine::semantics::{Semantics, Type};
use fine::tokens::Lines; use fine::tokens::Lines;
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
@ -70,7 +71,7 @@ fn rebase_concrete(source_path: &str, dump: &str) {
} }
fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) { fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
let dump = tree.dump(); let dump = tree.dump(false);
let rebase = std::env::var("FINE_TEST_REBASE") let rebase = std::env::var("FINE_TEST_REBASE")
.unwrap_or(String::new()) .unwrap_or(String::new())
.to_lowercase(); .to_lowercase();
@ -85,12 +86,52 @@ fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
} }
fn assert_type_at( fn assert_type_at(
_tree: &SyntaxTree, tree: &SyntaxTree,
_lines: &Lines, lines: &Lines,
_pos: usize, pos: usize,
_expected: &str, expected: &str,
_source_path: &str, _source_path: &str,
) { ) {
let tree_ref = match tree.find_tree_at(pos) {
Some(t) => t,
None => {
println!("Unable to find the subtee at position {pos}! Parsed the tree as:");
println!("\n{}", tree.dump(true));
panic!("Cannot find tree at position {pos}");
}
};
let semantics = Semantics::new(tree, lines);
let tree_type = semantics.type_of(tree_ref, true);
let actual = format!("{}", tree_type.unwrap_or(Type::Error));
if actual != expected {
println!(
"The type of the {:?} tree at position {pos} had the wrong type! Parsed the tree as:",
tree[tree_ref].kind
);
println!("\n{}", tree.dump(true));
let errors = semantics.snapshot_errors();
if errors.len() == 0 {
println!("There were no errors reported during type checking.\n");
} else {
println!(
"{} error{} reported during type checking:",
errors.len(),
if errors.len() == 1 { "" } else { "s" }
);
for error in &errors {
println!(" Error: {error}");
}
println!();
}
assert_eq!(
expected, actual,
"The type of the tree at position {pos} was incorrect"
);
}
} }
include!(concat!(env!("OUT_DIR"), "/generated_tests.rs")); include!(concat!(env!("OUT_DIR"), "/generated_tests.rs"));

View file

@ -20,3 +20,5 @@
// | Semicolon:'";"' // | Semicolon:'";"'
// //
1 * 2 + -3 * 4; 1 * 2 + -3 * 4;
// type: 532 f64

View file

@ -20,3 +20,5 @@
// | Semicolon:'";"' // | Semicolon:'";"'
// //
true and false or false and !true; true and false or false and !true;
// type: 549 bool

View file

@ -24,3 +24,21 @@
// | RightBrace:'"}"' // | RightBrace:'"}"'
// //
if true { "discarded"; 23 } else { 45 } if true { "discarded"; 23 } else { 45 }
// Here come some type probes!
// (type of the condition)
// type: 667 bool
//
// (the discarded expression)
// type: 674 string
//
// (the "then" clause)
// type: 686 f64
// type: 689 f64
//
// (the "else" clause)
// type: 696 f64
// type: 699 f64
//
// (the overall expression)
// type: 664 f64

View file

@ -4,6 +4,7 @@
// | LiteralExpression // | LiteralExpression
// | Number:'"42"' // | Number:'"42"'
// | Semicolon:'";"' // | Semicolon:'";"'
//
// type: 138 Number
42; 42;
// type: 129 f64

View file

@ -10,3 +10,5 @@
// | Semicolon:'";"' // | Semicolon:'";"'
// //
"Hello " + 'world!'; "Hello " + 'world!';
// type: 261 string