[fine] Type testing with probes and reporting
I'm proud of the test harness here actually. Also fix a bug in checking!
This commit is contained in:
parent
c0f40aa512
commit
618e0028d3
10 changed files with 192 additions and 78 deletions
|
|
@ -15,7 +15,7 @@ fn generate_test_for_file(path: PathBuf) -> String {
|
|||
while let Some(line) = lines.next() {
|
||||
let line = match line.strip_prefix("//") {
|
||||
Some(line) => line,
|
||||
None => break,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let line = line.trim();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
// NOTE: much of this parser structure derived from
|
||||
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
||||
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||
use std::fmt::Write as _;
|
||||
use std::{cell::Cell, num::NonZeroU32};
|
||||
|
||||
pub mod old; // Until I decide to delete it.
|
||||
|
|
@ -22,10 +23,22 @@ impl<'a> SyntaxTree<'a> {
|
|||
self.root
|
||||
}
|
||||
|
||||
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
|
||||
pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
|
||||
assert!(t.parent.is_none());
|
||||
let tr = TreeRef::from_index(self.trees.len());
|
||||
|
||||
t.start_pos = t
|
||||
.children
|
||||
.first()
|
||||
.map(|c| c.start_position(&self))
|
||||
.unwrap_or(0);
|
||||
|
||||
t.end_pos = t
|
||||
.children
|
||||
.last()
|
||||
.map(|c| c.end_position(&self))
|
||||
.unwrap_or(t.start_pos);
|
||||
|
||||
// NOTE: Because of the difficulty of holding multiple mutable
|
||||
// references it's this is our best chance to patch up parent
|
||||
// pointers.
|
||||
|
|
@ -34,23 +47,52 @@ impl<'a> SyntaxTree<'a> {
|
|||
self[*ct].parent = Some(tr);
|
||||
}
|
||||
}
|
||||
|
||||
self.trees.push(t);
|
||||
tr
|
||||
}
|
||||
|
||||
pub fn dump(&self) -> String {
|
||||
match self.root {
|
||||
Some(r) => self[r].dump(self),
|
||||
None => String::new(),
|
||||
pub fn dump(&self, with_positions: bool) -> String {
|
||||
let mut output = String::new();
|
||||
if let Some(r) = self.root {
|
||||
self[r].dump(self, with_positions, &mut output);
|
||||
}
|
||||
output
|
||||
}
|
||||
|
||||
pub fn start_position(&self, t: TreeRef) -> Option<usize> {
|
||||
self[t].start_position(&self)
|
||||
pub fn start_position(&self, t: TreeRef) -> usize {
|
||||
self[t].start_pos
|
||||
}
|
||||
|
||||
pub fn end_position(&self, t: TreeRef) -> Option<usize> {
|
||||
self[t].end_position(&self)
|
||||
pub fn end_position(&self, t: TreeRef) -> usize {
|
||||
self[t].end_pos
|
||||
}
|
||||
|
||||
pub fn find_tree_at(&self, pos: usize) -> Option<TreeRef> {
|
||||
let mut current = self.root?;
|
||||
let mut tree = &self[current];
|
||||
if pos < tree.start_pos || pos >= tree.end_pos {
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
let mut found = false;
|
||||
for child in &tree.children {
|
||||
if let Child::Tree(next) = child {
|
||||
let next_tree = &self[*next];
|
||||
if pos >= next_tree.start_pos && pos < next_tree.end_pos {
|
||||
found = true;
|
||||
current = *next;
|
||||
tree = next_tree;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return Some(current);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -95,6 +137,8 @@ pub enum TreeKind {
|
|||
pub struct Tree<'a> {
|
||||
pub kind: TreeKind,
|
||||
pub parent: Option<TreeRef>,
|
||||
pub start_pos: usize,
|
||||
pub end_pos: usize,
|
||||
pub children: Vec<Child<'a>>,
|
||||
}
|
||||
|
||||
|
|
@ -118,41 +162,9 @@ impl<'a> Tree<'a> {
|
|||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
pub fn start_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
|
||||
for child in &self.children {
|
||||
let start = match child {
|
||||
Child::Tree(tr) => tree.start_position(*tr),
|
||||
Child::Token(tok) => Some(tok.start),
|
||||
};
|
||||
|
||||
if let Some(start) = start {
|
||||
return Some(start);
|
||||
}
|
||||
}
|
||||
|
||||
// Fundamentally no tokens in this tree. This seems *broken*.
|
||||
None
|
||||
}
|
||||
|
||||
pub fn end_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
|
||||
for child in self.children.iter().rev() {
|
||||
let end = match child {
|
||||
Child::Tree(tr) => tree.end_position(*tr),
|
||||
Child::Token(tok) => Some(tok.start + tok.as_str().len()),
|
||||
};
|
||||
|
||||
if let Some(start) = end {
|
||||
return Some(start);
|
||||
}
|
||||
}
|
||||
|
||||
// Fundamentally no tokens in this tree. This seems *broken*.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
|
||||
pub struct TreeRef(NonZeroU32);
|
||||
|
||||
impl TreeRef {
|
||||
|
|
@ -168,13 +180,15 @@ impl TreeRef {
|
|||
}
|
||||
|
||||
impl<'a> Tree<'a> {
|
||||
pub fn dump(&self, tree: &SyntaxTree<'a>) -> String {
|
||||
let mut output = String::new();
|
||||
output.push_str(&format!("{:?}\n", self.kind));
|
||||
for child in self.children.iter() {
|
||||
child.dump_rec(2, tree, &mut output);
|
||||
pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
|
||||
let _ = write!(output, "{:?}", self.kind);
|
||||
if with_positions {
|
||||
let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
|
||||
}
|
||||
let _ = write!(output, "\n");
|
||||
for child in self.children.iter() {
|
||||
child.dump_rec(2, tree, with_positions, output);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -184,21 +198,52 @@ pub enum Child<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Child<'a> {
|
||||
fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) {
|
||||
fn dump_rec(
|
||||
&self,
|
||||
indent: usize,
|
||||
tree: &SyntaxTree<'a>,
|
||||
with_positions: bool,
|
||||
output: &mut String,
|
||||
) {
|
||||
for _ in 0..indent {
|
||||
output.push(' ');
|
||||
let _ = write!(output, " ");
|
||||
}
|
||||
match self {
|
||||
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
|
||||
Child::Token(t) => {
|
||||
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
|
||||
if with_positions {
|
||||
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
|
||||
}
|
||||
let _ = write!(output, "\n");
|
||||
}
|
||||
Child::Tree(t) => {
|
||||
let t = &tree[*t];
|
||||
output.push_str(&format!("{:?}\n", t.kind));
|
||||
let _ = write!(output, "{:?}", t.kind);
|
||||
if with_positions {
|
||||
let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos);
|
||||
}
|
||||
let _ = write!(output, "\n");
|
||||
|
||||
for child in t.children.iter() {
|
||||
child.dump_rec(indent + 2, tree, output);
|
||||
child.dump_rec(indent + 2, tree, with_positions, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
|
||||
match &self {
|
||||
Child::Token(t) => t.start,
|
||||
Child::Tree(t) => syntax_tree[*t].start_pos,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
|
||||
match &self {
|
||||
Child::Token(t) => t.start + t.as_str().len(),
|
||||
Child::Tree(t) => syntax_tree[*t].end_pos,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum ParseEvent<'a> {
|
||||
|
|
@ -373,6 +418,8 @@ impl<'a> CParser<'a> {
|
|||
ParseEvent::Start { kind } => stack.push(Tree {
|
||||
kind,
|
||||
parent: None,
|
||||
start_pos: 0,
|
||||
end_pos: 0,
|
||||
children: Vec::new(),
|
||||
}),
|
||||
|
||||
|
|
@ -726,6 +773,10 @@ mod tests {
|
|||
fn tree_ref_size() {
|
||||
// What's the point of doing all that work if the tree ref isn't nice
|
||||
// and "small"?
|
||||
//
|
||||
// TODO: This is a dumb optimization because tokens are
|
||||
// huge so Child is huge no matter what we do. If we retain
|
||||
// tokens out of line then we can re-visit this optimization.
|
||||
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ impl fmt::Debug for Error {
|
|||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message)
|
||||
write!(f, "{}:{}: {}", self.start.0, self.start.1, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,14 +123,14 @@ impl fmt::Display for Type {
|
|||
pub struct Semantics<'a> {
|
||||
// TODO: Do I really want my own copy here? Should we standardize on Arc
|
||||
// or Rc or some other nice sharing mechanism?
|
||||
syntax_tree: SyntaxTree<'a>,
|
||||
lines: Lines,
|
||||
syntax_tree: &'a SyntaxTree<'a>,
|
||||
lines: &'a Lines,
|
||||
errors: RefCell<Vec<Error>>,
|
||||
types: RefCell<HashMap<TreeRef, Type>>,
|
||||
}
|
||||
|
||||
impl<'a> Semantics<'a> {
|
||||
pub fn new(tree: SyntaxTree<'a>, lines: Lines) -> Self {
|
||||
pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self {
|
||||
let mut semantics = Semantics {
|
||||
syntax_tree: tree,
|
||||
lines,
|
||||
|
|
@ -148,11 +148,7 @@ impl<'a> Semantics<'a> {
|
|||
semantics
|
||||
}
|
||||
|
||||
pub fn syntax(&self) -> &SyntaxTree<'a> {
|
||||
&self.syntax_tree
|
||||
}
|
||||
|
||||
pub fn errors(&self) -> Vec<Error> {
|
||||
pub fn snapshot_errors(&self) -> Vec<Error> {
|
||||
(*self.errors.borrow()).clone()
|
||||
}
|
||||
|
||||
|
|
@ -181,18 +177,15 @@ impl<'a> Semantics<'a> {
|
|||
where
|
||||
T: ToString,
|
||||
{
|
||||
let start = tree.start_position(&self.syntax_tree).unwrap();
|
||||
let end = tree.start_position(&self.syntax_tree).unwrap();
|
||||
self.report_error_span(start, end, error)
|
||||
self.report_error_span(tree.start_pos, tree.end_pos, error)
|
||||
}
|
||||
|
||||
fn report_error_tree_ref<T>(&self, tree: TreeRef, error: T)
|
||||
where
|
||||
T: ToString,
|
||||
{
|
||||
let start = self.syntax_tree.start_position(tree).unwrap();
|
||||
let end = self.syntax_tree.end_position(tree).unwrap();
|
||||
self.report_error_span(start, end, error)
|
||||
let tree = &self.syntax_tree[tree];
|
||||
self.report_error_span(tree.start_pos, tree.end_pos, error)
|
||||
}
|
||||
|
||||
fn gather_errors(&mut self, tree: TreeRef) {
|
||||
|
|
@ -259,7 +252,7 @@ impl<'a> Semantics<'a> {
|
|||
match (op.kind, argument_type) {
|
||||
(TokenKind::Plus, Type::F64) => Some(Type::F64),
|
||||
(TokenKind::Minus, Type::F64) => Some(Type::F64),
|
||||
(TokenKind::Bang, Type::Bool) => Some(Type::F64),
|
||||
(TokenKind::Bang, Type::Bool) => Some(Type::Bool),
|
||||
|
||||
// This is dumb and should be punished, probably.
|
||||
(_, Type::Unreachable) => {
|
||||
|
|
|
|||
|
|
@ -429,7 +429,11 @@ impl<'a> Tokens<'a> {
|
|||
};
|
||||
|
||||
match c {
|
||||
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
|
||||
' ' | '\t' | '\r' => self.whitespace(pos),
|
||||
'\n' => {
|
||||
self.lines.add_line(pos);
|
||||
self.whitespace(pos)
|
||||
}
|
||||
'{' => self.token(pos, TokenKind::LeftBrace),
|
||||
'}' => self.token(pos, TokenKind::RightBrace),
|
||||
'[' => self.token(pos, TokenKind::LeftBracket),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use fine::parser::SyntaxTree;
|
||||
use fine::semantics::{Semantics, Type};
|
||||
use fine::tokens::Lines;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
|
|
@ -70,7 +71,7 @@ fn rebase_concrete(source_path: &str, dump: &str) {
|
|||
}
|
||||
|
||||
fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
|
||||
let dump = tree.dump();
|
||||
let dump = tree.dump(false);
|
||||
let rebase = std::env::var("FINE_TEST_REBASE")
|
||||
.unwrap_or(String::new())
|
||||
.to_lowercase();
|
||||
|
|
@ -85,12 +86,52 @@ fn assert_concrete(tree: &SyntaxTree, expected: &str, source_path: &str) {
|
|||
}
|
||||
|
||||
fn assert_type_at(
|
||||
_tree: &SyntaxTree,
|
||||
_lines: &Lines,
|
||||
_pos: usize,
|
||||
_expected: &str,
|
||||
tree: &SyntaxTree,
|
||||
lines: &Lines,
|
||||
pos: usize,
|
||||
expected: &str,
|
||||
_source_path: &str,
|
||||
) {
|
||||
let tree_ref = match tree.find_tree_at(pos) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
println!("Unable to find the subtee at position {pos}! Parsed the tree as:");
|
||||
println!("\n{}", tree.dump(true));
|
||||
panic!("Cannot find tree at position {pos}");
|
||||
}
|
||||
};
|
||||
|
||||
let semantics = Semantics::new(tree, lines);
|
||||
let tree_type = semantics.type_of(tree_ref, true);
|
||||
|
||||
let actual = format!("{}", tree_type.unwrap_or(Type::Error));
|
||||
if actual != expected {
|
||||
println!(
|
||||
"The type of the {:?} tree at position {pos} had the wrong type! Parsed the tree as:",
|
||||
tree[tree_ref].kind
|
||||
);
|
||||
println!("\n{}", tree.dump(true));
|
||||
|
||||
let errors = semantics.snapshot_errors();
|
||||
if errors.len() == 0 {
|
||||
println!("There were no errors reported during type checking.\n");
|
||||
} else {
|
||||
println!(
|
||||
"{} error{} reported during type checking:",
|
||||
errors.len(),
|
||||
if errors.len() == 1 { "" } else { "s" }
|
||||
);
|
||||
for error in &errors {
|
||||
println!(" Error: {error}");
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
expected, actual,
|
||||
"The type of the tree at position {pos} was incorrect"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/generated_tests.rs"));
|
||||
|
|
|
|||
|
|
@ -20,3 +20,5 @@
|
|||
// | Semicolon:'";"'
|
||||
//
|
||||
1 * 2 + -3 * 4;
|
||||
|
||||
// type: 532 f64
|
||||
|
|
@ -20,3 +20,5 @@
|
|||
// | Semicolon:'";"'
|
||||
//
|
||||
true and false or false and !true;
|
||||
|
||||
// type: 549 bool
|
||||
|
|
|
|||
|
|
@ -24,3 +24,21 @@
|
|||
// | RightBrace:'"}"'
|
||||
//
|
||||
if true { "discarded"; 23 } else { 45 }
|
||||
|
||||
// Here come some type probes!
|
||||
// (type of the condition)
|
||||
// type: 667 bool
|
||||
//
|
||||
// (the discarded expression)
|
||||
// type: 674 string
|
||||
//
|
||||
// (the "then" clause)
|
||||
// type: 686 f64
|
||||
// type: 689 f64
|
||||
//
|
||||
// (the "else" clause)
|
||||
// type: 696 f64
|
||||
// type: 699 f64
|
||||
//
|
||||
// (the overall expression)
|
||||
// type: 664 f64
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
// | LiteralExpression
|
||||
// | Number:'"42"'
|
||||
// | Semicolon:'";"'
|
||||
//
|
||||
// type: 138 Number
|
||||
|
||||
42;
|
||||
|
||||
// type: 129 f64
|
||||
|
|
|
|||
|
|
@ -10,3 +10,5 @@
|
|||
// | Semicolon:'";"'
|
||||
//
|
||||
"Hello " + 'world!';
|
||||
|
||||
// type: 261 string
|
||||
Loading…
Add table
Add a link
Reference in a new issue