[fine] Type testing with probes and reporting

I'm proud of the test harness here actually. Also fix a bug in
checking!
This commit is contained in:
John Doty 2024-01-05 17:10:15 -08:00
parent c0f40aa512
commit 618e0028d3
10 changed files with 192 additions and 78 deletions

View file

@ -1,6 +1,7 @@
// NOTE: much of this parser structure derived from
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
use crate::tokens::{Lines, Token, TokenKind, Tokens};
use std::fmt::Write as _;
use std::{cell::Cell, num::NonZeroU32};
pub mod old; // Until I decide to delete it.
@ -22,10 +23,22 @@ impl<'a> SyntaxTree<'a> {
self.root
}
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
pub fn add_tree(&mut self, mut t: Tree<'a>) -> TreeRef {
assert!(t.parent.is_none());
let tr = TreeRef::from_index(self.trees.len());
t.start_pos = t
.children
.first()
.map(|c| c.start_position(&self))
.unwrap_or(0);
t.end_pos = t
.children
.last()
.map(|c| c.end_position(&self))
.unwrap_or(t.start_pos);
// NOTE: Because of the difficulty of holding multiple mutable
// references it's this is our best chance to patch up parent
// pointers.
@ -34,23 +47,52 @@ impl<'a> SyntaxTree<'a> {
self[*ct].parent = Some(tr);
}
}
self.trees.push(t);
tr
}
pub fn dump(&self) -> String {
match self.root {
Some(r) => self[r].dump(self),
None => String::new(),
pub fn dump(&self, with_positions: bool) -> String {
let mut output = String::new();
if let Some(r) = self.root {
self[r].dump(self, with_positions, &mut output);
}
output
}
pub fn start_position(&self, t: TreeRef) -> Option<usize> {
self[t].start_position(&self)
pub fn start_position(&self, t: TreeRef) -> usize {
self[t].start_pos
}
pub fn end_position(&self, t: TreeRef) -> Option<usize> {
self[t].end_position(&self)
pub fn end_position(&self, t: TreeRef) -> usize {
self[t].end_pos
}
pub fn find_tree_at(&self, pos: usize) -> Option<TreeRef> {
let mut current = self.root?;
let mut tree = &self[current];
if pos < tree.start_pos || pos >= tree.end_pos {
return None;
}
loop {
let mut found = false;
for child in &tree.children {
if let Child::Tree(next) = child {
let next_tree = &self[*next];
if pos >= next_tree.start_pos && pos < next_tree.end_pos {
found = true;
current = *next;
tree = next_tree;
break;
}
}
}
if !found {
return Some(current);
}
}
}
}
@ -95,6 +137,8 @@ pub enum TreeKind {
pub struct Tree<'a> {
pub kind: TreeKind,
pub parent: Option<TreeRef>,
pub start_pos: usize,
pub end_pos: usize,
pub children: Vec<Child<'a>>,
}
@ -118,41 +162,9 @@ impl<'a> Tree<'a> {
})
.flatten()
}
pub fn start_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in &self.children {
let start = match child {
Child::Tree(tr) => tree.start_position(*tr),
Child::Token(tok) => Some(tok.start),
};
if let Some(start) = start {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
pub fn end_position(&self, tree: &SyntaxTree<'a>) -> Option<usize> {
for child in self.children.iter().rev() {
let end = match child {
Child::Tree(tr) => tree.end_position(*tr),
Child::Token(tok) => Some(tok.start + tok.as_str().len()),
};
if let Some(start) = end {
return Some(start);
}
}
// Fundamentally no tokens in this tree. This seems *broken*.
None
}
}
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
pub struct TreeRef(NonZeroU32);
impl TreeRef {
@ -168,13 +180,15 @@ impl TreeRef {
}
impl<'a> Tree<'a> {
pub fn dump(&self, tree: &SyntaxTree<'a>) -> String {
let mut output = String::new();
output.push_str(&format!("{:?}\n", self.kind));
for child in self.children.iter() {
child.dump_rec(2, tree, &mut output);
pub fn dump(&self, tree: &SyntaxTree<'a>, with_positions: bool, output: &mut String) {
let _ = write!(output, "{:?}", self.kind);
if with_positions {
let _ = write!(output, " [{}, {})", self.start_pos, self.end_pos);
}
let _ = write!(output, "\n");
for child in self.children.iter() {
child.dump_rec(2, tree, with_positions, output);
}
output
}
}
@ -184,21 +198,52 @@ pub enum Child<'a> {
}
impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, tree: &SyntaxTree<'a>, output: &mut String) {
fn dump_rec(
&self,
indent: usize,
tree: &SyntaxTree<'a>,
with_positions: bool,
output: &mut String,
) {
for _ in 0..indent {
output.push(' ');
let _ = write!(output, " ");
}
match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
Child::Token(t) => {
let _ = write!(output, "{:?}:'{:?}'", t.kind, t.as_str());
if with_positions {
let _ = write!(output, " [{}, {})", t.start, t.start + t.as_str().len());
}
let _ = write!(output, "\n");
}
Child::Tree(t) => {
let t = &tree[*t];
output.push_str(&format!("{:?}\n", t.kind));
let _ = write!(output, "{:?}", t.kind);
if with_positions {
let _ = write!(output, " [{}, {})", t.start_pos, t.end_pos);
}
let _ = write!(output, "\n");
for child in t.children.iter() {
child.dump_rec(indent + 2, tree, output);
child.dump_rec(indent + 2, tree, with_positions, output);
}
}
}
}
pub fn start_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start,
Child::Tree(t) => syntax_tree[*t].start_pos,
}
}
pub fn end_position(&self, syntax_tree: &SyntaxTree) -> usize {
match &self {
Child::Token(t) => t.start + t.as_str().len(),
Child::Tree(t) => syntax_tree[*t].end_pos,
}
}
}
enum ParseEvent<'a> {
@ -373,6 +418,8 @@ impl<'a> CParser<'a> {
ParseEvent::Start { kind } => stack.push(Tree {
kind,
parent: None,
start_pos: 0,
end_pos: 0,
children: Vec::new(),
}),
@ -726,6 +773,10 @@ mod tests {
fn tree_ref_size() {
// What's the point of doing all that work if the tree ref isn't nice
// and "small"?
//
// TODO: This is a dumb optimization because tokens are
// huge so Child is huge no matter what we do. If we retain
// tokens out of line then we can re-visit this optimization.
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
}
}