[fine] Type testing with probes and reporting

I'm proud of the test harness here actually. Also fix a bug in
checking!
This commit is contained in:
John Doty 2024-01-05 17:10:15 -08:00
parent c0f40aa512
commit 618e0028d3
10 changed files with 192 additions and 78 deletions

View file

@ -1,6 +1,7 @@
// NOTE: much of this parser structure derived from
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt::Write as _;
use std::{cell::Cell, num::NonZeroU32};
pub mod old; // Until I decide to delete it.
@ -22,10 +23,22 @@ impl<'a> SyntaxTree<'a> {
self.root
}
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
assert!(t.parent.is_none());
let tr = TreeRef::from_index(self.trees.len());
t.start_pos = t
.children
.first()
.map(|c| c.start_position(&self))
.unwrap_or(0);
t.end_pos = t
.children
.last()
.map(|c| c.end_position(&self))
.unwrap_or(t.start_pos);
// NOTE: Because of the difficulty of holding multiple mutable
// references it's this is our best chance to patch up parent
// pointers.
@ -34,23 +47,52 @@ impl<'a> SyntaxTree<'a> {
self[*ct].parent = Some(tr);
}
}
self.trees.push(t);
tr
}
pub fn dump(&self) -> String {
match self.root {
Some(r) => self[r].dump(self),
None => String::new(),
pub fn dump(&self, with_positions: bool) -> String {
let mut output = String::new();
if let Some(r) = self.root {
self[r].dump(self, with_positions, &mut output);
}
output
}
pub fn start_position(&self, t: TreeRef) -> Option<usize> {
self[t].start_position(&self)
pub fn start_position(&self, t: TreeRef) -> usize {
self[t].start_pos
}
pub fn end_position(&self, t: TreeRef) -> Option<usize> {
self[t].end_position(&self)
pub fn end_position(&self, t: TreeRef) -> usize {
self[t].end_pos
}
pub fn find_tree_at(&self, pos: usize) -> Option<TreeRef> {
let mut current = self.root?;
let mut tree = &self[current];
if pos < tree.start_pos || pos >= tree.end_pos {
return None;
}
loop {
let mut found = false;
for child in &tree.children {
if let Child::Tree(next) = child {
let next_tree = &self[*next];
if pos >= next_tree.start_pos && pos < next_tree.end_pos {
found = true;
current = *next;
tree = next_tree;
break;
}
}
}
if !found {
return Some(current);
}
}
}
}
@ -95,6 +137,8 @@ pub enum TreeKind {
pub struct Tree<'a> {
pub kind: TreeKind,
pub parent: Option<TreeRef>,
pub start_pos: usize,
pub end_pos: usize,
pub children: Vec<Child<'a>>,
}
@ -118,41 +162,9 @@ impl<'a> Tree<'a> {
})
.flatten()
}
pub fn start_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in &self.children {
let start = match child {
Child::Tree(tr) => tree.start_position(*tr),
Child::Token(tok) => Some(tok.start),
};
if let Some(start) = start {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
pub fn end_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in self.children.iter().rev() {
let end = match child {
Child::Tree(tr) => tree.end_position(*tr),
Child::Token(tok) => Some(tok.start + tok.as_str().len()),
};
if let Some(start) = end {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct TreeRef(NonZeroU32);
impl TreeRef {
@ -168,13 +180,15 @@ impl TreeRef {
}
impl<'a> Tree<'a> {
pub fn dump(&self, tree: &SyntaxTree<'a>) -> String {
let mut output = String::new();
output.push_str(&format!("{:?}\n", self.kind));
for child in self.children.iter() {
child.dump_rec(2, tree, &mut output);
pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
let _ = write!(output, "{:?}", self.kind);
if with_positions {
let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
}
let _ = write!(output, "\n");
for child in self.children.iter() {
child.dump_rec(2, tree, with_positions, output);
}
output
}
}
@ -184,21 +198,52 @@ pub enum Child<'a> {
}
impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) {
fn dump_rec(
&self,
indent: usize,
tree: &SyntaxTree<'a>,
with_positions: bool,
output: &mut String,
) {
for _ in 0..indent {
output.push(' ');
let _ = write!(output, " ");
}
match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
Child::Token(t) => {
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
if with_positions {
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
}
let _ = write!(output, "\n");
}
Child::Tree(t) => {
let t = &tree[*t];
output.push_str(&format!("{:?}\n", t.kind));
let _ = write!(output, "{:?}", t.kind);
if with_positions {
let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos);
}
let _ = write!(output, "\n");
for child in t.children.iter() {
child.dump_rec(indent + 2, tree, output);
child.dump_rec(indent + 2, tree, with_positions, output);
}
}
}
}
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start,
Child::Tree(t) => syntax_tree[*t].start_pos,
}
}
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start + t.as_str().len(),
Child::Tree(t) => syntax_tree[*t].end_pos,
}
}
}
enum ParseEvent<'a> {
@ -373,6 +418,8 @@ impl<'a> CParser<'a> {
ParseEvent::Start { kind } => stack.push(Tree {
kind,
parent: None,
start_pos: 0,
end_pos: 0,
children: Vec::new(),
}),
@ -726,6 +773,10 @@ mod tests {
fn tree_ref_size() {
// What's the point of doing all that work if the tree ref isn't nice
// and "small"?
//
// TODO: This is a dumb optimization because tokens are
// huge so Child is huge no matter what we do. If we retain
// tokens out of line then we can re-visit this optimization.
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
}
}

View file

@ -51,7 +51,7 @@ impl fmt::Debug for Error {
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}:{}: {}", self.start.0, self.end.0, self.message)
write!(f, "{}:{}: {}", self.start.0, self.start.1, self.message)
}
}
@ -123,14 +123,14 @@ impl fmt::Display for Type {
pub struct Semantics<'a> {
// TODO: Do I really want my own copy here? Should we standardize on Arc
// or Rc or some other nice sharing mechanism?
syntax_tree: SyntaxTree<'a>,
lines: Lines,
syntax_tree: &'a SyntaxTree<'a>,
lines: &'a Lines,
errors: RefCell<Vec<Error>>,
types: RefCell<HashMap<TreeRef, Type>>,
}
impl<'a> Semantics<'a> {
pub fn new(tree: SyntaxTree<'a>, lines: Lines) -> Self {
pub fn new(tree: &'a SyntaxTree<'a>, lines: &'a Lines) -> Self {
let mut semantics = Semantics {
syntax_tree: tree,
lines,
@ -148,11 +148,7 @@ impl<'a> Semantics<'a> {
semantics
}
pub fn syntax(&self) -> &SyntaxTree<'a> {
&self.syntax_tree
}
pub fn errors(&self) -> Vec<Error> {
pub fn snapshot_errors(&self) -> Vec<Error> {
(*self.errors.borrow()).clone()
}
@ -181,18 +177,15 @@ impl<'a> Semantics<'a> {
where
T: ToString,
{
let start = tree.start_position(&self.syntax_tree).unwrap();
let end = tree.start_position(&self.syntax_tree).unwrap();
self.report_error_span(start, end, error)
self.report_error_span(tree.start_pos, tree.end_pos, error)
}
fn report_error_tree_ref<T>(&self, tree: TreeRef, error: T)
where
T: ToString,
{
let start = self.syntax_tree.start_position(tree).unwrap();
let end = self.syntax_tree.end_position(tree).unwrap();
self.report_error_span(start, end, error)
let tree = &self.syntax_tree[tree];
self.report_error_span(tree.start_pos, tree.end_pos, error)
}
fn gather_errors(&mut self, tree: TreeRef) {
@ -259,7 +252,7 @@ impl<'a> Semantics<'a> {
match (op.kind, argument_type) {
(TokenKind::Plus, Type::F64) => Some(Type::F64),
(TokenKind::Minus, Type::F64) => Some(Type::F64),
(TokenKind::Bang, Type::Bool) => Some(Type::F64),
(TokenKind::Bang, Type::Bool) => Some(Type::Bool),
// This is dumb and should be punished, probably.
(_, Type::Unreachable) => {

View file

@ -429,7 +429,11 @@ impl<'a> Tokens<'a> {
};
match c {
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
' ' | '\t' | '\r' => self.whitespace(pos),
'\n' => {
self.lines.add_line(pos);
self.whitespace(pos)
}
'{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket),