[fine] Dump CSTs and an example zoo

This commit is contained in:
John Doty 2024-01-04 13:25:17 -08:00
parent 757db0ba3e
commit 1f6d7ec131
9 changed files with 357 additions and 32 deletions

64
Cargo.lock generated
View file

@ -150,7 +150,7 @@ dependencies = [
"proc-macro2",
"quote",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -295,7 +295,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -672,6 +672,12 @@ dependencies = [
[[package]]
name = "fine"
version = "0.1.0"
dependencies = [
"glob",
"prettyplease",
"quote",
"syn 2.0.47",
]
[[package]]
name = "flate2"
@ -720,7 +726,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -753,7 +759,7 @@ dependencies = [
"pmutil",
"proc-macro2",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -1038,7 +1044,7 @@ dependencies = [
"pmutil",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -1717,7 +1723,7 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -1751,6 +1757,16 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "prettyplease"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
dependencies = [
"proc-macro2",
"syn 2.0.47",
]
[[package]]
name = "proc-macro-crate"
version = "1.3.1"
@ -1769,9 +1785,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
[[package]]
name = "proc-macro2"
version = "1.0.59"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708"
dependencies = [
"unicode-ident",
]
@ -1793,9 +1809,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.28"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
@ -2011,7 +2027,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2216,7 +2232,7 @@ dependencies = [
"proc-macro2",
"quote",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2282,7 +2298,7 @@ dependencies = [
"proc-macro2",
"quote",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2331,7 +2347,7 @@ dependencies = [
"proc-macro2",
"quote",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2414,7 +2430,7 @@ dependencies = [
"proc-macro2",
"quote",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2518,7 +2534,7 @@ dependencies = [
"pmutil",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2530,7 +2546,7 @@ dependencies = [
"pmutil",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2554,7 +2570,7 @@ dependencies = [
"proc-macro2",
"quote",
"swc_macros_common",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2570,9 +2586,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.18"
version = "2.0.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb"
dependencies = [
"proc-macro2",
"quote",
@ -2614,7 +2630,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2704,7 +2720,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
]
[[package]]
@ -2912,7 +2928,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
"wasm-bindgen-shared",
]
@ -2946,7 +2962,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
"syn 2.0.47",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]

55
fine/Cargo.lock generated
View file

@ -12,9 +12,19 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
name = "fine"
version = "0.1.0"
dependencies = [
"glob",
"pretty_assertions",
"prettyplease",
"quote",
"syn",
]
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "pretty_assertions"
version = "1.4.0"
@ -25,6 +35,51 @@ dependencies = [
"yansi",
]
[[package]]
name = "prettyplease"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "syn"
version = "2.0.47"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "yansi"
version = "0.5.1"

View file

@ -5,3 +5,9 @@ edition = "2021"
[dev-dependencies]
pretty_assertions = "1.4.0"
[build-dependencies]
glob = "0.3.1"
prettyplease = "0.2.16"
quote = "1.0.35"
syn = "2.0.47"

96
fine/build.rs Normal file
View file

@ -0,0 +1,96 @@
use quote::{format_ident, quote};
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
fn generate_test_for_file(path: PathBuf) -> String {
let contents = fs::read_to_string(&path).expect("Unable to read input");
let mut concrete_stuff: Option<String> = None;
// Start iterating over lines and processing directives....
let mut lines = contents.lines();
while let Some(line) = lines.next() {
let line = match line.strip_prefix("//") {
Some(line) => line,
None => break,
};
let line = line.trim();
if line == "concrete:" {
let mut concrete = String::new();
while let Some(line) = lines.next() {
let line = match line.strip_prefix("// | ") {
Some(line) => line,
None => break,
};
concrete.push_str(line);
concrete.push_str("\n");
}
concrete_stuff = Some(concrete);
}
}
let concrete_comparison = if let Some(concrete) = concrete_stuff {
quote! {
crate::assert_concrete(&_tree, #concrete)
}
} else {
quote! {}
};
let name = format_ident!("{}", path.file_stem().unwrap().to_string_lossy());
let test_method = quote! {
fn #name() {
let (_tree, _lines) = fine::parser::concrete::parse_concrete(#contents);
#concrete_comparison;
}
};
let syntax_tree = syn::parse2(test_method).unwrap();
prettyplease::unparse(&syntax_tree)
}
fn process_directory<T>(output: &mut String, path: T)
where
T: AsRef<Path>,
{
let fine_ext: std::ffi::OsString = "fine".into();
let path = path.as_ref();
for entry in std::fs::read_dir(path).expect("Unable to read directory") {
match entry {
Ok(dirent) => {
let file_type = dirent.file_type().unwrap();
if file_type.is_dir() {
let file_name = dirent.file_name();
let file_name = file_name.to_string_lossy().to_owned();
output.push_str(&format!("mod {file_name} {{\n"));
process_directory(output, dirent.path());
output.push_str("}\n\n");
} else if file_type.is_file() {
if dirent.path().extension() == Some(&fine_ext) {
output.push_str(&format!("// {}\n", dirent.path().display()));
output.push_str("#[test]\n");
output.push_str(&generate_test_for_file(dirent.path()));
output.push_str("\n\n");
}
} else {
eprintln!("Skipping symlink: {}", path.display());
}
}
Err(e) => eprintln!("Unable to read directory entry: {:?}", e),
}
}
}
fn main() {
println!("cargo:rerun-if-changed=./tests");
let mut test_source = String::new();
process_directory(&mut test_source, "./tests");
let out_dir = env::var_os("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("generated_tests.rs");
fs::write(dest_path, test_source).unwrap();
}

View file

@ -646,8 +646,13 @@ impl<'a> Parser<'a> {
fn advance(&mut self) {
self.previous = self.current.clone();
self.current = self.tokens.next();
while self.current.kind == TokenKind::Error {
self.error_at_current(self.current.to_string());
while self.current.kind == TokenKind::Error
|| self.current.kind == TokenKind::Whitespace
|| self.current.kind == TokenKind::Comment
{
if self.current.kind == TokenKind::Error {
self.error_at_current(self.current.to_string());
}
self.current = self.tokens.next();
}
}

View file

@ -32,6 +32,7 @@ fn token_power<'a>(token: TokenKind) -> Option<u8> {
}
}
#[derive(Debug)]
pub enum TreeKind {
Error,
File,
@ -62,11 +63,53 @@ pub struct Tree<'a> {
pub children: Vec<Child<'a>>,
}
impl<'a> Tree<'a> {
pub fn dump(&self) -> String {
let mut output = String::new();
output.push_str(&format!("{:?}\n", self.kind));
for child in self.children.iter() {
child.dump_rec(2, &mut output);
}
output
}
}
impl<'a> std::fmt::Debug for Tree<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "[{:?}", self.kind)?;
for child in self.children.iter() {
match child {
Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?,
Child::Tree(t) => write!(f, " {t:?}")?,
}
}
write!(f, "]")?;
Ok(())
}
}
pub enum Child<'a> {
Token(Token<'a>),
Tree(Tree<'a>),
}
impl<'a> Child<'a> {
fn dump_rec(&self, indent: usize, output: &mut String) {
for _ in 0..indent {
output.push(' ');
}
match self {
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
Child::Tree(t) => {
output.push_str(&format!("{:?}\n", t.kind));
for child in t.children.iter() {
child.dump_rec(indent + 2, output);
}
}
}
}
}
enum ParseEvent<'a> {
Start { kind: TreeKind },
End,
@ -97,6 +140,7 @@ impl<'a> CParser<'a> {
events: Vec::new(),
};
parser.current = parser.tokens.next();
parser.skip_ephemera();
parser
}
@ -135,6 +179,14 @@ impl<'a> CParser<'a> {
token: self.current.clone(),
});
self.current = self.tokens.next();
self.skip_ephemera();
}
fn skip_ephemera(&mut self) {
while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment
{
self.current = self.tokens.next();
}
}
fn eof(&self) -> bool {
@ -213,8 +265,14 @@ impl<'a> CParser<'a> {
let mut events = self.events;
let mut stack = Vec::new();
// Special case: pop the last `Close` event to ensure that the stack
// is non-empty inside the loop.
// The first element in our events vector must be a start; the whole
// thing must be bracketed in a tree.
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
// The last element in our events vector must be an end, otherwise
// the parser has failed badly. We'll remove it here so that, after
// processing the entire array, the stack retains the tree that we
// start with the very first ::Start.
assert!(matches!(events.pop(), Some(ParseEvent::End)));
for event in events {
@ -240,7 +298,7 @@ impl<'a> CParser<'a> {
}
}
pub fn c_parse(source: &str) -> (Tree, Lines) {
pub fn parse_concrete(source: &str) -> (Tree, Lines) {
let tokens = Tokens::new(source);
let mut parser = CParser::new(tokens);
@ -527,3 +585,34 @@ fn identifier(p: &mut CParser) -> MarkClosed {
p.end(m, TreeKind::Identifier)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
fn test_successful_expression_parse(source: &str, expected: &str) {
let tokens = Tokens::new(source);
let mut parser = CParser::new(tokens);
expression(&mut parser);
let (tree, _) = parser.build_tree();
assert_eq!(
expected,
format!("{tree:?}"),
"The parse structure of the expressions did not match"
);
}
macro_rules! test_expr {
($name:ident, $input:expr, $expected:expr) => {
#[test]
fn $name() {
test_successful_expression_parse($input, $expected);
}
};
}
test_expr!(number_expr, "12", "[LiteralExpression Number:'12']");
}

View file

@ -3,6 +3,9 @@ pub enum TokenKind {
EOF,
Error,
Whitespace,
Comment,
LeftBrace,
RightBrace,
LeftBracket,
@ -390,7 +393,7 @@ impl<'a> Tokens<'a> {
self.next_char.is_none()
}
fn skip_whitespace(&mut self) {
fn whitespace(&mut self, pos: usize) -> Token<'a> {
while let Some((pos, ch)) = self.next_char {
if ch == '\n' {
self.lines.add_line(pos);
@ -399,16 +402,27 @@ impl<'a> Tokens<'a> {
}
self.advance();
}
self.token(pos, TokenKind::Whitespace)
}
fn comment(&mut self, pos: usize) -> Token<'a> {
while let Some((_, ch)) = self.next_char {
if ch == '\n' {
break;
}
self.advance();
}
self.token(pos, TokenKind::Comment)
}
pub fn next(&mut self) -> Token<'a> {
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
let (pos, c) = match self.advance() {
Some((p, c)) => (p, c),
None => return self.token(self.source.len(), TokenKind::EOF),
};
match c {
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
'{' => self.token(pos, TokenKind::LeftBrace),
'}' => self.token(pos, TokenKind::RightBrace),
'[' => self.token(pos, TokenKind::LeftBracket),
@ -427,7 +441,13 @@ impl<'a> Tokens<'a> {
'+' => self.token(pos, TokenKind::Plus),
':' => self.token(pos, TokenKind::Colon),
';' => self.token(pos, TokenKind::Semicolon),
'/' => self.token(pos, TokenKind::Slash),
'/' => {
if self.matches('/') {
self.comment(pos)
} else {
self.token(pos, TokenKind::Slash)
}
}
'*' => self.token(pos, TokenKind::Star),
'!' => {
if self.matches('=') {
@ -484,6 +504,9 @@ mod tests {
while !is_eof {
let token = tokens.next();
is_eof = token.kind == TokenKind::EOF;
if token.kind == TokenKind::Whitespace {
continue;
}
result.push(token);
}

View file

@ -0,0 +1,8 @@
use fine::parser::concrete::Tree;
use pretty_assertions::assert_eq;
fn assert_concrete(tree: &Tree, expected: &str) {
assert_eq!(tree.dump(), expected, "concrete syntax trees did not match");
}
include!(concat!(env!("OUT_DIR"), "/generated_tests.rs"));

View file

@ -0,0 +1,27 @@
// concrete:
// | File
// | ExpressionStatement
// | LiteralExpression
// | Number:'"42"'
// | Semicolon:'";"'
// | ExpressionStatement
// | BinaryExpression
// | BinaryExpression
// | LiteralExpression
// | Number:'"1"'
// | Star:'"*"'
// | LiteralExpression
// | Number:'"2"'
// | Plus:'"+"'
// | BinaryExpression
// | UnaryExpression
// | Minus:'"-"'
// | LiteralExpression
// | Number:'"3"'
// | Star:'"*"'
// | LiteralExpression
// | Number:'"4"'
// | Semicolon:'";"'
//
42;
1 * 2 + -3 * 4;