[fine] Parent pointers in trees
This commit is contained in:
parent
4f3536ea50
commit
7abb8eafc2
3 changed files with 132 additions and 55 deletions
|
|
@ -1,34 +1,56 @@
|
|||
// NOTE: much of this parser structure derived from
|
||||
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
||||
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||
use std::cell::Cell;
|
||||
use std::{cell::Cell, num::NonZeroU32};
|
||||
|
||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||
// meet a minimum binding power. (This is like "precedence" but I just super
|
||||
// don't like that terminology.)
|
||||
const ASSIGNMENT_POWER: u8 = 0; // =
|
||||
const OR_POWER: u8 = 1; // or
|
||||
const AND_POWER: u8 = 2; // and
|
||||
const EQUALITY_POWER: u8 = 3; // == !=
|
||||
const COMPARISON_POWER: u8 = 4; // < > <= >=
|
||||
const TERM_POWER: u8 = 5; // + -
|
||||
const FACTOR_POWER: u8 = 6; // * /
|
||||
const UNARY_POWER: u8 = 7; // ! -
|
||||
pub struct ConcreteTree<'a> {
|
||||
trees: Vec<Tree<'a>>,
|
||||
root: Option<TreeRef>,
|
||||
}
|
||||
|
||||
// const PRIMARY_POWER: u8 = 9;
|
||||
|
||||
fn token_power<'a>(token: TokenKind) -> Option<u8> {
|
||||
match token {
|
||||
TokenKind::Equal => Some(ASSIGNMENT_POWER),
|
||||
TokenKind::Or => Some(OR_POWER),
|
||||
TokenKind::And => Some(AND_POWER),
|
||||
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
|
||||
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
||||
Some(COMPARISON_POWER)
|
||||
impl<'a> ConcreteTree<'a> {
|
||||
pub fn new() -> Self {
|
||||
ConcreteTree {
|
||||
trees: vec![],
|
||||
root: None,
|
||||
}
|
||||
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
|
||||
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
|
||||
_ => None,
|
||||
}
|
||||
|
||||
pub fn add_tree(&mut self, t: Tree<'a>) -> TreeRef {
|
||||
assert!(t.parent.is_none());
|
||||
let tr = TreeRef::from_index(self.trees.len());
|
||||
|
||||
// NOTE: Because of the difficulty of holding multiple mutable
|
||||
// references it's this is our best chance to patch up parent
|
||||
// pointers.
|
||||
for child in t.children.iter() {
|
||||
if let Child::Tree(ct) = child {
|
||||
self[*ct].parent = Some(tr);
|
||||
}
|
||||
}
|
||||
self.trees.push(t);
|
||||
tr
|
||||
}
|
||||
|
||||
pub fn dump(&self) -> String {
|
||||
match self.root {
|
||||
Some(r) => self[r].dump(self),
|
||||
None => String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Index<TreeRef> for ConcreteTree<'a> {
|
||||
type Output = Tree<'a>;
|
||||
|
||||
fn index(&self, index: TreeRef) -> &Self::Output {
|
||||
&self.trees[index.index()]
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::ops::IndexMut<TreeRef> for ConcreteTree<'a> {
|
||||
fn index_mut(&mut self, index: TreeRef) -> &mut Self::Output {
|
||||
&mut self.trees[index.index()]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -58,52 +80,53 @@ pub enum TreeKind {
|
|||
|
||||
pub struct Tree<'a> {
|
||||
pub kind: TreeKind,
|
||||
// TODO: Indirect reference? Flatness? Using a reference structure will
|
||||
// make caching and annotation easier if desired.
|
||||
pub parent: Option<TreeRef>,
|
||||
pub children: Vec<Child<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||
pub struct TreeRef(NonZeroU32);
|
||||
|
||||
impl TreeRef {
|
||||
pub fn from_index(index: usize) -> TreeRef {
|
||||
let index: u32 = (index + 1).try_into().unwrap();
|
||||
TreeRef(NonZeroU32::new(index).unwrap())
|
||||
}
|
||||
|
||||
pub fn index(&self) -> usize {
|
||||
let index: usize = self.0.get().try_into().unwrap();
|
||||
index - 1
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Tree<'a> {
|
||||
pub fn dump(&self) -> String {
|
||||
pub fn dump(&self, tree: &ConcreteTree<'a>) -> String {
|
||||
let mut output = String::new();
|
||||
output.push_str(&format!("{:?}\n", self.kind));
|
||||
for child in self.children.iter() {
|
||||
child.dump_rec(2, &mut output);
|
||||
child.dump_rec(2, tree, &mut output);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for Tree<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[{:?}", self.kind)?;
|
||||
for child in self.children.iter() {
|
||||
match child {
|
||||
Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?,
|
||||
Child::Tree(t) => write!(f, " {t:?}")?,
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Child<'a> {
|
||||
Token(Token<'a>),
|
||||
Tree(Tree<'a>),
|
||||
Tree(TreeRef),
|
||||
}
|
||||
|
||||
impl<'a> Child<'a> {
|
||||
fn dump_rec(&self, indent: usize, output: &mut String) {
|
||||
fn dump_rec(&self, indent: usize, tree: &ConcreteTree<'a>, output: &mut String) {
|
||||
for _ in 0..indent {
|
||||
output.push(' ');
|
||||
}
|
||||
match self {
|
||||
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
|
||||
Child::Tree(t) => {
|
||||
let t = &tree[*t];
|
||||
output.push_str(&format!("{:?}\n", t.kind));
|
||||
for child in t.children.iter() {
|
||||
child.dump_rec(indent + 2, output);
|
||||
child.dump_rec(indent + 2, tree, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -261,10 +284,12 @@ impl<'a> CParser<'a> {
|
|||
});
|
||||
}
|
||||
|
||||
fn build_tree(self) -> (Tree<'a>, Lines) {
|
||||
fn build_tree(self) -> (ConcreteTree<'a>, Lines) {
|
||||
let mut events = self.events;
|
||||
let mut stack = Vec::new();
|
||||
|
||||
let mut result = ConcreteTree::new();
|
||||
|
||||
// The first element in our events vector must be a start; the whole
|
||||
// thing must be bracketed in a tree.
|
||||
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
|
||||
|
|
@ -279,12 +304,13 @@ impl<'a> CParser<'a> {
|
|||
match event {
|
||||
ParseEvent::Start { kind } => stack.push(Tree {
|
||||
kind,
|
||||
parent: None,
|
||||
children: Vec::new(),
|
||||
}),
|
||||
|
||||
ParseEvent::End => {
|
||||
let tree = stack.pop().unwrap();
|
||||
stack.last_mut().unwrap().children.push(Child::Tree(tree));
|
||||
let t = result.add_tree(stack.pop().unwrap());
|
||||
stack.last_mut().unwrap().children.push(Child::Tree(t));
|
||||
}
|
||||
|
||||
ParseEvent::Advance { token } => {
|
||||
|
|
@ -294,11 +320,14 @@ impl<'a> CParser<'a> {
|
|||
}
|
||||
|
||||
assert!(stack.len() == 1, "Not all trees were ended!");
|
||||
(stack.pop().unwrap(), self.tokens.lines())
|
||||
let root = result.add_tree(stack.pop().unwrap());
|
||||
result.root = Some(root);
|
||||
|
||||
(result, self.tokens.lines())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_concrete(source: &str) -> (Tree, Lines) {
|
||||
pub fn parse_concrete(source: &str) -> (ConcreteTree, Lines) {
|
||||
let tokens = Tokens::new(source);
|
||||
let mut parser = CParser::new(tokens);
|
||||
|
||||
|
|
@ -463,6 +492,35 @@ fn expression(p: &mut CParser) {
|
|||
expression_with_power(p, 0)
|
||||
}
|
||||
|
||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||
// meet a minimum binding power. (This is like "precedence" but I just super
|
||||
// don't like that terminology.)
|
||||
const ASSIGNMENT_POWER: u8 = 0; // =
|
||||
const OR_POWER: u8 = 1; // or
|
||||
const AND_POWER: u8 = 2; // and
|
||||
const EQUALITY_POWER: u8 = 3; // == !=
|
||||
const COMPARISON_POWER: u8 = 4; // < > <= >=
|
||||
const TERM_POWER: u8 = 5; // + -
|
||||
const FACTOR_POWER: u8 = 6; // * /
|
||||
const UNARY_POWER: u8 = 7; // ! -
|
||||
|
||||
// const PRIMARY_POWER: u8 = 9;
|
||||
|
||||
fn token_power<'a>(token: TokenKind) -> Option<u8> {
|
||||
match token {
|
||||
TokenKind::Equal => Some(ASSIGNMENT_POWER),
|
||||
TokenKind::Or => Some(OR_POWER),
|
||||
TokenKind::And => Some(AND_POWER),
|
||||
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
|
||||
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
||||
Some(COMPARISON_POWER)
|
||||
}
|
||||
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
|
||||
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn expression_with_power(p: &mut CParser, minimum_power: u8) {
|
||||
let mut expr = prefix_expression(p);
|
||||
while p.at(TokenKind::LeftParen) {
|
||||
|
|
@ -591,3 +649,15 @@ fn identifier(p: &mut CParser) -> MarkClosed {
|
|||
|
||||
p.end(m, TreeKind::Identifier)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn tree_ref_size() {
|
||||
// What's the point of doing all that work if the tree ref isn't nice
|
||||
// and "small"?
|
||||
assert_eq!(4, std::mem::size_of::<Option<TreeRef>>());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,11 +58,18 @@ pub enum TokenKind {
|
|||
Yield,
|
||||
}
|
||||
|
||||
// NOTE: Tokens are kinda big (like 40 bytes?) and AFAICT the only way to go
|
||||
// smaller would be to stop using string pointers and use smaller
|
||||
// sizes/offsets instead, e.g., 32b for offset and 32b for size, and
|
||||
// stop tracking the position independently from the start, and then
|
||||
// require the source text when converting to line/col. I'm unwilling to
|
||||
// give up the ergonomics of &str and String right now, so we're just
|
||||
// not doing it.
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct Token<'a> {
|
||||
pub kind: TokenKind,
|
||||
pub start: usize,
|
||||
value: Result<&'a str, String>,
|
||||
value: Result<&'a str, Box<str>>,
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
|
|
@ -78,7 +85,7 @@ impl<'a> Token<'a> {
|
|||
Token {
|
||||
kind: TokenKind::Error,
|
||||
start,
|
||||
value: Err(message),
|
||||
value: Err(message.into()),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use fine::parser::concrete::Tree;
|
||||
use fine::parser::concrete::ConcreteTree;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn rebase_concrete(source_path: &str, dump: &str) {
|
||||
|
|
@ -68,7 +68,7 @@ fn rebase_concrete(source_path: &str, dump: &str) {
|
|||
std::fs::write(source_path, result).expect("unable to write the new file!");
|
||||
}
|
||||
|
||||
fn assert_concrete(tree: &Tree, expected: &str, source_path: &str) {
|
||||
fn assert_concrete(tree: &ConcreteTree, expected: &str, source_path: &str) {
|
||||
let dump = tree.dump();
|
||||
let rebase = std::env::var("FINE_TEST_REBASE")
|
||||
.unwrap_or(String::new())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue