[fine] Type testing with probes and reporting

I'm proud of the test harness here actually. Also fix a bug in
checking!
This commit is contained in:
John Doty 2024-01-05 17:10:15 -08:00
parent c0f40aa512
commit 618e0028d3
10 changed files with 192 additions and 78 deletions

View file

@ -15,7 +15,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
while let Some(line) = lines.next() {
let line = match line.strip_prefix("//") {
Some(line) => line,
None => break,
None => continue,
};
let line = line.trim();

View file

@ -1,6 +1,7 @@
// NOTE: much of this parser structure derived from
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt::Write as _;
use std::{cell::Cell, num::NonZeroU32};
pub mod old; // Until I decide to delete it.
@ -22,10 +23,22 @@ impl<'a> SyntaxTree<'a> {
self.root
}
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
assert!(t.parent.is_none());
let tr = TreeRef::from_index(self.trees.len());
t.start_pos = t
.children
.first()
.map(|c| c.start_position(&self))
.unwrap_or(0);
t.end_pos = t
.children
.last()
.map(|c| c.end_position(&self))
.unwrap_or(t.start_pos);
// NOTE: Because of the difficulty of holding multiple mutable
// references it's this is our best chance to patch up parent
// pointers.
@ -34,23 +47,52 @@ impl<'a> SyntaxTree<'a> {
self[*ct].parent = Some(tr);
}
}
self.trees.push(t);
tr
}
pub fn dump(&self) -> String {
match self.root {
Some(r) => self[r].dump(self),
None => String::new(),
pub fn dump(&self, with_positions: bool) -> String {
let mut output = String::new();
if let Some(r) = self.root {
self[r].dump(self, with_positions, &mut output);
}
output
}
pub fn start_position(&self, t: TreeRef) -> Option<usize> {
self[t].start_position(&self)
pub fn start_position(&self, t: TreeRef) -> usize {
self[t].start_pos
}
pub fn end_position(&self, t: TreeRef) -> Option<usize> {
self[t].end_position(&self)
pub fn end_position(&self, t: TreeRef) -> usize {
self[t].end_pos
}
pub fn find_tree_at(&self, pos: usize) -> Option<TreeRef> {
let mut current = self.root?;
let mut tree = &self[current];
if pos < tree.start_pos || pos >= tree.end_pos {
return None;
}
loop {
let mut found = false;
for child in &tree.children {
if let Child::Tree(next) = child {
let next_tree = &self[*next];
if pos >= next_tree.start_pos && pos < next_tree.end_pos {
found = true;
current = *next;
tree = next_tree;
break;
}
}
}
if !found {
return Some(current);
}
}
}
}
@ -95,6 +137,8 @@ pub enum TreeKind {
pub struct Tree<'a> {
pub kind: TreeKind,
pub parent: Option<TreeRef>,
pub start_pos: usize,
pub end_pos: usize,
pub children: Vec<Child<'a>>,
}
@ -118,41 +162,9 @@ impl<'a> Tree<'a> {
})
.flatten()
}
pub fn start_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in &self.children {
let start = match child {
Child::Tree(tr) => tree.start_position(*tr),
Child::Token(tok) => Some(tok.start),
};
if let Some(start) = start {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
pub fn end_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in self.children.iter().rev() {
let end = match child {
Child::Tree(tr) => tree.end_position(*tr),
Child::Token(tok) => Some(tok.start + tok.as_str().len()),
};
if let Some(start) = end {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct TreeRef(NonZeroU32);
impl TreeRef {
@ -168,13 +180,15 @@ impl TreeRef {
}
impl<'a> Tree<'a> {
pub fn dump(&self, tree: &SyntaxTree<'a>) -> String {
let mut output = String::new();
output.push_str(&format!("{:?}\n", self.kind));
for child in self.children.iter() {
child.dump_rec(2, tree, &mut output);
pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
let _ = write!(output, "{:?}", self.kind);
if with_positions {
let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
}
let _ = write!(output, "\n");
for child in self.children.iter() {
child.dump_rec(2, tree, with_positions, output);
}
output
}
}
@ -184,21 +198,52 @@ pub enum Child<'a> {
}
impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) {
fn dump_rec(
&self,
indent: usize,
tree: &SyntaxTree<'a>,
with_positions: bool,
output: &mut String,
) {
for _ in 0..indent {
output.push(' ');
let _ = write!(output, " ");
}
match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
Child::Token(t) => {
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
if with_positions {
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
}
let _ = write!(output, "\n");
}
Child::Tree(t) => {
let t = &tree[*t];
output.push_str(&format!("{:?}\n", t.kind));
let _ = write!(output, "{:?}", t.kind);
if with_positions {
let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos);
}
let _ = write!(output, "\n");
for child in t.children.iter() {
child.dump_rec(indent + 2, tree, output);
child.dump_rec(indent + 2, tree, with_positions, output);
}
}
}
}
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start,
Child::Tree(t) => syntax_tree[*t].start_pos,
}
}
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start + t.as_str().len(),
Child::Tree(t) => syntax_tree[*t].end_pos,
}
}
}
enum ParseEvent<'a> {
@ -373,6 +418,8 @@ impl<'a> CParser<'a> {
ParseEvent::Start { kind } => stack.push(Tree {
kind,
parent: None,
start_pos: 0,
end_pos: 0,
children: Vec::new(),
}),
@ -726,6 +773,10 @@ mod tests {
fn tree_ref_size() {
// What's the point of doing all that work if the tree ref isn't nice
// and "small"?
//
// TODO: This is a dumb optimization because tokens are
// huge so Child is huge no matter what we do. If we retain
// tokens out of line then we can re-visit this optimization.
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
}
}

View file

@ -51,7 +51,7 @@ impl fmt::Debug for Error {
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message)
write!(f, "{}:{}: {}", self.start.0, self.start.1, self.message)
}
}
@ -123,14 +123,14 @@ impl fmt::Display for Type {
pub struct Semantics<'a> {
// TODO: Do I really want my own copy here? Should we standardize on Arc
// or Rc or some other nice sharing mechanism?
syntax_tree: SyntaxTree<'a>,
lines: Lines,
syntax_tree: &'a SyntaxTree<'a>,
lines: &'a Lines,
errors: RefCell<Vec<Error>>,
types: RefCell<HashMap<TreeRef, Type>>,
}
impl<'a> Semantics<'a> {
pub fn new(tree: SyntaxTree<'a>, lines: Lines) -> Self {
pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self {
let mut semantics = Semantics {
syntax_tree: tree,
lines,
@ -148,11 +148,7 @@ impl<'a> Semantics<'a> {
semantics
}
pub fn syntax(&self) -> &SyntaxTree<'a> {
&self.syntax_tree
}
pub fn errors(&self) -> Vec<Error> {
pub fn snapshot_errors(&self) -> Vec<Error> {
(*self.errors.borrow()).clone()
}
@ -181,18 +177,15 @@ impl<'a> Semantics<'a> {
where
T: ToString,
{
let start = tree.start_position(&self.syntax_tree).unwrap();
let end = tree.start_position(&self.syntax_tree).unwrap();
self.report_error_span(start, end, error)
self.report_error_span(tree.start_pos, tree.end_pos, error)
}
fn report_error_tree_ref<T>(&self, tree: TreeRef, error: T)
where
T: ToString,
{
let start = self.syntax_tree.start_position(tree).unwrap();
let end = self.syntax_tree.end_position(tree).unwrap();
self.report_error_span(start, end, error)
let tree = &self.syntax_tree[tree];
self.report_error_span(tree.start_pos, tree.end_pos, error)
}
fn gather_errors(&mut self, tree: TreeRef) {
@ -259,7 +252,7 @@ impl<'a> Semantics<'a> {
match (op.kind, argument_type) {
(TokenKind::Plus, Type::F64) => Some(Type::F64),
(TokenKind::Minus, Type::F64) => Some(Type::F64),
(TokenKind::Bang, Type::Bool) => Some(Type::F64),
(TokenKind::Bang, Type::Bool) => Some(Type::Bool),
// This is dumb and should be punished, probably.
(_, Type::Unreachable) => {

View file

@ -429,7 +429,11 @@ impl<'a> Tokens<'a> {
};
match c {
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
' ' | '\t' | '\r' => self.whitespace(pos),
'\n' => {
self.lines.add_line(pos);
self.whitespace(pos)
}
'{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket),

View file

@ -1,4 +1,5 @@
use fine::parser::SyntaxTree;
use fine::semantics::{Semantics, Type};
use fine::tokens::Lines;
use pretty_assertions::assert_eq;
@ -70,7 +71,7 @@ fn rebase_concrete(source_path: &str, dump: &str) {
}
fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
let dump = tree.dump();
let dump = tree.dump(false);
let rebase = std::env::var("FINE_TEST_REBASE")
.unwrap_or(String::new())
.to_lowercase();
@ -85,12 +86,52 @@ fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
}
fn assert_type_at(
_tree: &SyntaxTree,
_lines: &Lines,
_pos: usize,
_expected: &str,
tree: &SyntaxTree,
lines: &Lines,
pos: usize,
expected: &str,
_source_path: &str,
) {
let tree_ref = match tree.find_tree_at(pos) {
Some(t) => t,
None => {
println!("Unable to find the subtee at position {pos}! Parsed the tree as:");
println!("\n{}", tree.dump(true));
panic!("Cannot find tree at position {pos}");
}
};
let semantics = Semantics::new(tree, lines);
let tree_type = semantics.type_of(tree_ref, true);
let actual = format!("{}", tree_type.unwrap_or(Type::Error));
if actual != expected {
println!(
"The type of the {:?} tree at position {pos} had the wrong type! Parsed the tree as:",
tree[tree_ref].kind
);
println!("\n{}", tree.dump(true));
let errors = semantics.snapshot_errors();
if errors.len() == 0 {
println!("There were no errors reported during type checking.\n");
} else {
println!(
"{} error{} reported during type checking:",
errors.len(),
if errors.len() == 1 { "" } else { "s" }
);
for error in &errors {
println!(" Error: {error}");
}
println!();
}
assert_eq!(
expected, actual,
"The type of the tree at position {pos} was incorrect"
);
}
}
include!(concat!(env!("OUT_DIR"), "/generated_tests.rs"));

View file

@ -20,3 +20,5 @@
// | Semicolon:'";"'
//
1 * 2 + -3 * 4;
// type: 532 f64

View file

@ -20,3 +20,5 @@
// | Semicolon:'";"'
//
true and false or false and !true;
// type: 549 bool

View file

@ -24,3 +24,21 @@
// | RightBrace:'"}"'
//
if true { "discarded"; 23 } else { 45 }
// Here come some type probes!
// (type of the condition)
// type: 667 bool
//
// (the discarded expression)
// type: 674 string
//
// (the "then" clause)
// type: 686 f64
// type: 689 f64
//
// (the "else" clause)
// type: 696 f64
// type: 699 f64
//
// (the overall expression)
// type: 664 f64

View file

@ -4,6 +4,7 @@
// | LiteralExpression
// | Number:'"42"'
// | Semicolon:'";"'
//
// type: 138 Number
42;
// type: 129 f64

View file

@ -10,3 +10,5 @@
// | Semicolon:'";"'
//
"Hello " + 'world!';
// type: 261 string