Compare commits
No commits in common. "1f6d7ec13193cea344f938582fb7794087d2d467" and "4fe3137027cb7d9595876a44f3332d6c1d14519a" have entirely different histories.
1f6d7ec131
...
4fe3137027
9 changed files with 516 additions and 886 deletions
64
Cargo.lock
generated
64
Cargo.lock
generated
|
|
@ -150,7 +150,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -295,7 +295,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -672,12 +672,6 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "fine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"prettyplease",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
|
|
@ -726,7 +720,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -759,7 +753,7 @@ dependencies = [
|
|||
"pmutil",
|
||||
"proc-macro2",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1044,7 +1038,7 @@ dependencies = [
|
|||
"pmutil",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1723,7 +1717,7 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1757,16 +1751,6 @@ version = "0.1.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn 2.0.47",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-crate"
|
||||
version = "1.3.1"
|
||||
|
|
@ -1785,9 +1769,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.75"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708"
|
||||
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
|
@ -1809,9 +1793,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.35"
|
||||
version = "1.0.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
||||
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
|
@ -2027,7 +2011,7 @@ checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2232,7 +2216,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2298,7 +2282,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2347,7 +2331,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2430,7 +2414,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2534,7 +2518,7 @@ dependencies = [
|
|||
"pmutil",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2546,7 +2530,7 @@ dependencies = [
|
|||
"pmutil",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2570,7 +2554,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"swc_macros_common",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2586,9 +2570,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.47"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb"
|
||||
checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -2630,7 +2614,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2720,7 +2704,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2928,7 +2912,7 @@ dependencies = [
|
|||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
|
|
@ -2962,7 +2946,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.47",
|
||||
"syn 2.0.18",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
|
|
|||
55
fine/Cargo.lock
generated
55
fine/Cargo.lock
generated
|
|
@ -12,19 +12,9 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
|
|||
name = "fine"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"pretty_assertions",
|
||||
"prettyplease",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "pretty_assertions"
|
||||
version = "1.4.0"
|
||||
|
|
@ -35,51 +25,6 @@ dependencies = [
|
|||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.75"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.47"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.1"
|
||||
|
|
|
|||
|
|
@ -5,9 +5,3 @@ edition = "2021"
|
|||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.0"
|
||||
|
||||
[build-dependencies]
|
||||
glob = "0.3.1"
|
||||
prettyplease = "0.2.16"
|
||||
quote = "1.0.35"
|
||||
syn = "2.0.47"
|
||||
|
|
|
|||
|
|
@ -1,96 +0,0 @@
|
|||
use quote::{format_ident, quote};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
fn generate_test_for_file(path: PathBuf) -> String {
|
||||
let contents = fs::read_to_string(&path).expect("Unable to read input");
|
||||
|
||||
let mut concrete_stuff: Option<String> = None;
|
||||
|
||||
// Start iterating over lines and processing directives....
|
||||
let mut lines = contents.lines();
|
||||
while let Some(line) = lines.next() {
|
||||
let line = match line.strip_prefix("//") {
|
||||
Some(line) => line,
|
||||
None => break,
|
||||
};
|
||||
|
||||
let line = line.trim();
|
||||
if line == "concrete:" {
|
||||
let mut concrete = String::new();
|
||||
while let Some(line) = lines.next() {
|
||||
let line = match line.strip_prefix("// | ") {
|
||||
Some(line) => line,
|
||||
None => break,
|
||||
};
|
||||
|
||||
concrete.push_str(line);
|
||||
concrete.push_str("\n");
|
||||
}
|
||||
concrete_stuff = Some(concrete);
|
||||
}
|
||||
}
|
||||
|
||||
let concrete_comparison = if let Some(concrete) = concrete_stuff {
|
||||
quote! {
|
||||
crate::assert_concrete(&_tree, #concrete)
|
||||
}
|
||||
} else {
|
||||
quote! {}
|
||||
};
|
||||
|
||||
let name = format_ident!("{}", path.file_stem().unwrap().to_string_lossy());
|
||||
let test_method = quote! {
|
||||
fn #name() {
|
||||
let (_tree, _lines) = fine::parser::concrete::parse_concrete(#contents);
|
||||
#concrete_comparison;
|
||||
}
|
||||
};
|
||||
|
||||
let syntax_tree = syn::parse2(test_method).unwrap();
|
||||
prettyplease::unparse(&syntax_tree)
|
||||
}
|
||||
|
||||
fn process_directory<T>(output: &mut String, path: T)
|
||||
where
|
||||
T: AsRef<Path>,
|
||||
{
|
||||
let fine_ext: std::ffi::OsString = "fine".into();
|
||||
let path = path.as_ref();
|
||||
for entry in std::fs::read_dir(path).expect("Unable to read directory") {
|
||||
match entry {
|
||||
Ok(dirent) => {
|
||||
let file_type = dirent.file_type().unwrap();
|
||||
if file_type.is_dir() {
|
||||
let file_name = dirent.file_name();
|
||||
let file_name = file_name.to_string_lossy().to_owned();
|
||||
output.push_str(&format!("mod {file_name} {{\n"));
|
||||
process_directory(output, dirent.path());
|
||||
output.push_str("}\n\n");
|
||||
} else if file_type.is_file() {
|
||||
if dirent.path().extension() == Some(&fine_ext) {
|
||||
output.push_str(&format!("// {}\n", dirent.path().display()));
|
||||
output.push_str("#[test]\n");
|
||||
output.push_str(&generate_test_for_file(dirent.path()));
|
||||
output.push_str("\n\n");
|
||||
}
|
||||
} else {
|
||||
eprintln!("Skipping symlink: {}", path.display());
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!("Unable to read directory entry: {:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=./tests");
|
||||
|
||||
let mut test_source = String::new();
|
||||
process_directory(&mut test_source, "./tests");
|
||||
|
||||
let out_dir = env::var_os("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join("generated_tests.rs");
|
||||
fs::write(dest_path, test_source).unwrap();
|
||||
}
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||
use std::fmt;
|
||||
|
||||
pub mod concrete;
|
||||
use std::{cell::Cell, fmt};
|
||||
|
||||
// TODO: An error should have:
|
||||
//
|
||||
|
|
@ -159,6 +157,492 @@ impl std::fmt::Display for Type {
|
|||
}
|
||||
}
|
||||
|
||||
// NOTE: much of this parser structure derived from
|
||||
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
||||
pub enum TreeKind {
|
||||
Error,
|
||||
File,
|
||||
FunDecl,
|
||||
ParamList,
|
||||
Parameter,
|
||||
TypeExpression,
|
||||
Block,
|
||||
LetStatement,
|
||||
ReturnStatement,
|
||||
ExpressionStatement,
|
||||
LiteralExpression,
|
||||
GroupingExpression,
|
||||
UnaryExpression,
|
||||
ConditionalExpression,
|
||||
CallExpression,
|
||||
ArgumentList,
|
||||
Argument,
|
||||
BinaryExpression,
|
||||
IfStatement,
|
||||
}
|
||||
|
||||
pub struct Tree<'a> {
|
||||
pub kind: TreeKind,
|
||||
// TODO: Indirect reference? Flatness? Using a reference structure will
|
||||
// make caching and annotation easier if desired.
|
||||
pub children: Vec<Child<'a>>,
|
||||
}
|
||||
|
||||
pub enum Child<'a> {
|
||||
Token(Token<'a>),
|
||||
Tree(Tree<'a>),
|
||||
}
|
||||
|
||||
enum ParseEvent<'a> {
|
||||
Start { kind: TreeKind },
|
||||
End,
|
||||
Advance { token: Token<'a> },
|
||||
}
|
||||
|
||||
struct MarkStarted {
|
||||
index: usize,
|
||||
}
|
||||
|
||||
struct MarkClosed {
|
||||
index: usize,
|
||||
}
|
||||
|
||||
struct CParser<'a> {
|
||||
tokens: Tokens<'a>,
|
||||
current: Token<'a>,
|
||||
fuel: Cell<u32>,
|
||||
events: Vec<ParseEvent<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> CParser<'a> {
|
||||
fn new(tokens: Tokens<'a>) -> Self {
|
||||
let mut parser = CParser {
|
||||
tokens,
|
||||
current: Token::new(TokenKind::EOF, 0, ""),
|
||||
fuel: Cell::new(256),
|
||||
events: Vec::new(),
|
||||
};
|
||||
parser.current = parser.tokens.next();
|
||||
parser
|
||||
}
|
||||
|
||||
fn start(&mut self) -> MarkStarted {
|
||||
let mark = MarkStarted {
|
||||
index: self.events.len(),
|
||||
};
|
||||
self.events.push(ParseEvent::Start {
|
||||
kind: TreeKind::Error,
|
||||
});
|
||||
mark
|
||||
}
|
||||
|
||||
fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed {
|
||||
self.events[mark.index] = ParseEvent::Start { kind };
|
||||
self.events.push(ParseEvent::End);
|
||||
MarkClosed { index: mark.index }
|
||||
}
|
||||
|
||||
fn start_before(&mut self, mark: MarkClosed) -> MarkStarted {
|
||||
// TODO: Point backwards and pointer chase in tree build?
|
||||
let mark = MarkStarted { index: mark.index };
|
||||
self.events.insert(
|
||||
mark.index,
|
||||
ParseEvent::Start {
|
||||
kind: TreeKind::Error,
|
||||
},
|
||||
);
|
||||
mark
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
assert!(!self.eof()); // Don't try to advance past EOF
|
||||
self.fuel.set(256); // Consuming a token, rest stuck detector
|
||||
self.events.push(ParseEvent::Advance {
|
||||
token: self.current.clone(),
|
||||
});
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
|
||||
fn eof(&self) -> bool {
|
||||
self.current.kind == TokenKind::EOF
|
||||
}
|
||||
|
||||
fn peek(&self) -> TokenKind {
|
||||
assert!(self.fuel.get() > 0, "parser is stuck!");
|
||||
self.fuel.set(self.fuel.get() - 1);
|
||||
self.current.kind
|
||||
}
|
||||
|
||||
fn at(&self, kind: TokenKind) -> bool {
|
||||
self.peek() == kind
|
||||
}
|
||||
|
||||
fn eat(&mut self, kind: TokenKind) -> bool {
|
||||
if self.at(kind) {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn expect<T>(&mut self, kind: TokenKind, error: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
if self.eat(kind) {
|
||||
return;
|
||||
}
|
||||
self.error(error);
|
||||
}
|
||||
|
||||
fn advance_with_error<T>(&mut self, error: T) -> MarkClosed
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
let m = self.start();
|
||||
self.error(error);
|
||||
self.advance();
|
||||
self.end(m, TreeKind::Error)
|
||||
}
|
||||
|
||||
fn error<T>(&mut self, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
self.error_at(self.current.clone(), message)
|
||||
}
|
||||
|
||||
fn error_at<T>(&mut self, token: Token<'a>, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
let message: String = message.into();
|
||||
let mut final_message = "Error ".to_string();
|
||||
|
||||
if token.kind == TokenKind::EOF {
|
||||
final_message.push_str("at end")
|
||||
} else if token.kind != TokenKind::Error {
|
||||
final_message.push_str("at '");
|
||||
final_message.push_str(token.as_str());
|
||||
final_message.push_str("'");
|
||||
}
|
||||
final_message.push_str(": ");
|
||||
final_message.push_str(&message);
|
||||
|
||||
self.events.push(ParseEvent::Advance {
|
||||
token: Token::error(token.start, final_message),
|
||||
});
|
||||
}
|
||||
|
||||
fn build_tree(self) -> (Tree<'a>, Lines) {
|
||||
let mut events = self.events;
|
||||
let mut stack = Vec::new();
|
||||
|
||||
// Special case: pop the last `Close` event to ensure that the stack
|
||||
// is non-empty inside the loop.
|
||||
assert!(matches!(events.pop(), Some(ParseEvent::End)));
|
||||
|
||||
for event in events {
|
||||
match event {
|
||||
ParseEvent::Start { kind } => stack.push(Tree {
|
||||
kind,
|
||||
children: Vec::new(),
|
||||
}),
|
||||
|
||||
ParseEvent::End => {
|
||||
let tree = stack.pop().unwrap();
|
||||
stack.last_mut().unwrap().children.push(Child::Tree(tree));
|
||||
}
|
||||
|
||||
ParseEvent::Advance { token } => {
|
||||
stack.last_mut().unwrap().children.push(Child::Token(token));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(stack.len() == 1, "Not all trees were ended!");
|
||||
(stack.pop().unwrap(), self.tokens.lines())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn c_parse(source: &str) -> (Tree, Lines) {
|
||||
let tokens = Tokens::new(source);
|
||||
let mut parser = CParser::new(tokens);
|
||||
|
||||
file(&mut parser);
|
||||
|
||||
parser.build_tree()
|
||||
}
|
||||
|
||||
fn file(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
while !p.eof() {
|
||||
match p.peek() {
|
||||
TokenKind::Fun => function(p),
|
||||
_ => statement(p),
|
||||
}
|
||||
}
|
||||
p.end(m, TreeKind::File);
|
||||
}
|
||||
|
||||
fn function(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Fun));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::Fun, "expected a function to start with 'fun'");
|
||||
p.expect(TokenKind::Identifier, "expected a function name");
|
||||
if p.at(TokenKind::LeftParen) {
|
||||
param_list(p);
|
||||
}
|
||||
if p.eat(TokenKind::Arrow) {
|
||||
type_expr(p);
|
||||
}
|
||||
if p.at(TokenKind::LeftBrace) {
|
||||
block(p);
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::FunDecl);
|
||||
}
|
||||
|
||||
fn param_list(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::LeftParen));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list");
|
||||
while !p.at(TokenKind::RightParen) && !p.eof() {
|
||||
if p.at(TokenKind::Identifier) {
|
||||
parameter(p);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
p.expect(TokenKind::RightParen, "expect ')' to end a parameter list");
|
||||
|
||||
p.end(m, TreeKind::ParamList);
|
||||
}
|
||||
|
||||
fn parameter(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Identifier));
|
||||
let m = p.start();
|
||||
p.expect(
|
||||
TokenKind::Identifier,
|
||||
"expected an identifier for a parameter name",
|
||||
);
|
||||
if p.eat(TokenKind::Colon) {
|
||||
type_expr(p);
|
||||
}
|
||||
if !p.at(TokenKind::RightParen) {
|
||||
p.expect(TokenKind::Comma, "expected a comma between parameters");
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::Parameter);
|
||||
}
|
||||
|
||||
fn type_expr(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
// TODO: Other kinds of type expressions probably!
|
||||
p.expect(TokenKind::Identifier, "expected the identifier of a type");
|
||||
p.end(m, TreeKind::TypeExpression);
|
||||
}
|
||||
|
||||
fn block(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::LeftBrace));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::LeftBrace, "expect '{' to start a block");
|
||||
while !p.at(TokenKind::RightBrace) && !p.eof() {
|
||||
statement(p);
|
||||
}
|
||||
p.expect(TokenKind::RightBrace, "expect '}' to start a block");
|
||||
|
||||
p.end(m, TreeKind::Block);
|
||||
}
|
||||
|
||||
fn statement(p: &mut CParser) {
|
||||
match p.peek() {
|
||||
TokenKind::LeftBrace => block(p),
|
||||
TokenKind::Let => statement_let(p),
|
||||
TokenKind::Return => statement_return(p),
|
||||
|
||||
// NOTE: Technically 'if' is an expression, but `if` doesn't
|
||||
// require a semicolon at the end if it's all by itself.
|
||||
TokenKind::If => statement_if(p),
|
||||
|
||||
_ => statement_expression(p),
|
||||
}
|
||||
}
|
||||
|
||||
fn statement_if(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::If));
|
||||
let m = p.start();
|
||||
|
||||
conditional(p);
|
||||
|
||||
p.end(m, TreeKind::IfStatement);
|
||||
}
|
||||
|
||||
fn statement_let(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Let));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::Let, "expect 'let' to start a let statement");
|
||||
p.expect(TokenKind::Identifier, "expected a name for the variable");
|
||||
p.expect(TokenKind::Equal, "expected a '=' after the variable name");
|
||||
expression(p);
|
||||
p.expect(TokenKind::Semicolon, "expect ';' to end a let statement");
|
||||
|
||||
p.end(m, TreeKind::LetStatement);
|
||||
}
|
||||
|
||||
fn statement_return(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Return));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(
|
||||
TokenKind::Return,
|
||||
"expect 'return' to start a return statement",
|
||||
);
|
||||
expression(p);
|
||||
p.expect(TokenKind::Semicolon, "expect ';' to end a return statement");
|
||||
|
||||
p.end(m, TreeKind::ReturnStatement);
|
||||
}
|
||||
|
||||
fn statement_expression(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
|
||||
expression(p);
|
||||
p.expect(
|
||||
TokenKind::Semicolon,
|
||||
"expect ';' to end an expression statement",
|
||||
);
|
||||
|
||||
p.end(m, TreeKind::ExpressionStatement);
|
||||
}
|
||||
|
||||
fn expression(p: &mut CParser) {
|
||||
expression_with_power(p, 0)
|
||||
}
|
||||
|
||||
fn expression_with_power(p: &mut CParser, minimum_power: u8) {
|
||||
let mut expr = prefix_expression(p);
|
||||
while p.at(TokenKind::LeftParen) {
|
||||
let m = p.start_before(expr);
|
||||
argument_list(p);
|
||||
expr = p.end(m, TreeKind::CallExpression);
|
||||
}
|
||||
|
||||
loop {
|
||||
let Some(power) = token_power(p.peek()) else {
|
||||
break;
|
||||
};
|
||||
if power < minimum_power {
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: I don't think this works for other "infix" types, but we'll
|
||||
// see won't we.
|
||||
let m = p.start_before(expr);
|
||||
p.advance(); // Consume the operator
|
||||
expression_with_power(p, power);
|
||||
expr = p.end(m, TreeKind::BinaryExpression);
|
||||
}
|
||||
}
|
||||
|
||||
fn argument_list(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::LeftParen));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(
|
||||
TokenKind::LeftParen,
|
||||
"expect an argument list to start with '('",
|
||||
);
|
||||
while !p.at(TokenKind::RightParen) && !p.eof() {
|
||||
argument(p);
|
||||
}
|
||||
p.expect(
|
||||
TokenKind::RightParen,
|
||||
"expect an argument list to start with '('",
|
||||
);
|
||||
|
||||
p.end(m, TreeKind::ArgumentList);
|
||||
}
|
||||
|
||||
fn argument(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
|
||||
expression(p);
|
||||
if !p.at(TokenKind::RightParen) {
|
||||
p.expect(TokenKind::Comma, "expect a ',' between arguments");
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::Argument);
|
||||
}
|
||||
|
||||
fn prefix_expression(p: &mut CParser) -> MarkClosed {
|
||||
match p.peek() {
|
||||
TokenKind::Number => literal(p),
|
||||
TokenKind::String => literal(p),
|
||||
TokenKind::True => literal(p),
|
||||
TokenKind::False => literal(p),
|
||||
|
||||
TokenKind::LeftParen => grouping(p),
|
||||
|
||||
TokenKind::Bang => unary(p),
|
||||
TokenKind::Minus => unary(p),
|
||||
|
||||
TokenKind::If => conditional(p),
|
||||
|
||||
_ => p.advance_with_error("expected an expression"),
|
||||
}
|
||||
}
|
||||
|
||||
fn literal(p: &mut CParser) -> MarkClosed {
|
||||
let m = p.start();
|
||||
p.advance();
|
||||
p.end(m, TreeKind::LiteralExpression)
|
||||
}
|
||||
|
||||
fn grouping(p: &mut CParser) -> MarkClosed {
|
||||
assert!(p.at(TokenKind::LeftParen));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::LeftParen, "expected '(' to start grouping");
|
||||
expression(p);
|
||||
p.expect(TokenKind::RightParen, "unmatched parentheses in expression");
|
||||
|
||||
p.end(m, TreeKind::GroupingExpression)
|
||||
}
|
||||
|
||||
fn unary(p: &mut CParser) -> MarkClosed {
|
||||
let m = p.start();
|
||||
|
||||
p.advance(); // Past the operator
|
||||
expression_with_power(p, UNARY_POWER);
|
||||
|
||||
p.end(m, TreeKind::UnaryExpression)
|
||||
}
|
||||
|
||||
fn conditional(p: &mut CParser) -> MarkClosed {
|
||||
assert!(p.at(TokenKind::If));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::If, "expected conditional to start with 'if'");
|
||||
expression(p);
|
||||
block(p);
|
||||
if p.eat(TokenKind::Else) {
|
||||
if p.at(TokenKind::If) {
|
||||
// Don't require another block, just jump right into the conditional.
|
||||
conditional(p);
|
||||
} else {
|
||||
block(p);
|
||||
}
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::ConditionalExpression)
|
||||
}
|
||||
|
||||
pub struct SyntaxTree<'a> {
|
||||
pub errors: Vec<SyntaxError>,
|
||||
expressions: Vec<Expr<'a>>,
|
||||
|
|
@ -646,13 +1130,8 @@ impl<'a> Parser<'a> {
|
|||
fn advance(&mut self) {
|
||||
self.previous = self.current.clone();
|
||||
self.current = self.tokens.next();
|
||||
while self.current.kind == TokenKind::Error
|
||||
|| self.current.kind == TokenKind::Whitespace
|
||||
|| self.current.kind == TokenKind::Comment
|
||||
{
|
||||
if self.current.kind == TokenKind::Error {
|
||||
self.error_at_current(self.current.to_string());
|
||||
}
|
||||
while self.current.kind == TokenKind::Error {
|
||||
self.error_at_current(self.current.to_string());
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,618 +0,0 @@
|
|||
// NOTE: much of this parser structure derived from
|
||||
// https://matklad.github.io/2023/05/21/resilient-ll-parsing-tutorial.html
|
||||
use crate::tokens::{Lines, Token, TokenKind, Tokens};
|
||||
use std::cell::Cell;
|
||||
|
||||
// BINDING POWERS. When parsing expressions we only accept expressions that
|
||||
// meet a minimum binding power. (This is like "precedence" but I just super
|
||||
// don't like that terminology.)
|
||||
const ASSIGNMENT_POWER: u8 = 0; // =
|
||||
const OR_POWER: u8 = 1; // or
|
||||
const AND_POWER: u8 = 2; // and
|
||||
const EQUALITY_POWER: u8 = 3; // == !=
|
||||
const COMPARISON_POWER: u8 = 4; // < > <= >=
|
||||
const TERM_POWER: u8 = 5; // + -
|
||||
const FACTOR_POWER: u8 = 6; // * /
|
||||
const UNARY_POWER: u8 = 7; // ! -
|
||||
|
||||
// const PRIMARY_POWER: u8 = 9;
|
||||
|
||||
fn token_power<'a>(token: TokenKind) -> Option<u8> {
|
||||
match token {
|
||||
TokenKind::Equal => Some(ASSIGNMENT_POWER),
|
||||
TokenKind::Or => Some(OR_POWER),
|
||||
TokenKind::And => Some(AND_POWER),
|
||||
TokenKind::EqualEqual | TokenKind::BangEqual => Some(EQUALITY_POWER),
|
||||
TokenKind::Less | TokenKind::Greater | TokenKind::GreaterEqual | TokenKind::LessEqual => {
|
||||
Some(COMPARISON_POWER)
|
||||
}
|
||||
TokenKind::Plus | TokenKind::Minus => Some(TERM_POWER),
|
||||
TokenKind::Star | TokenKind::Slash => Some(FACTOR_POWER),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TreeKind {
|
||||
Error,
|
||||
File,
|
||||
FunDecl,
|
||||
ParamList,
|
||||
Parameter,
|
||||
TypeExpression,
|
||||
Block,
|
||||
LetStatement,
|
||||
ReturnStatement,
|
||||
ExpressionStatement,
|
||||
LiteralExpression,
|
||||
GroupingExpression,
|
||||
UnaryExpression,
|
||||
ConditionalExpression,
|
||||
CallExpression,
|
||||
ArgumentList,
|
||||
Argument,
|
||||
BinaryExpression,
|
||||
IfStatement,
|
||||
Identifier,
|
||||
}
|
||||
|
||||
pub struct Tree<'a> {
|
||||
pub kind: TreeKind,
|
||||
// TODO: Indirect reference? Flatness? Using a reference structure will
|
||||
// make caching and annotation easier if desired.
|
||||
pub children: Vec<Child<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Tree<'a> {
|
||||
pub fn dump(&self) -> String {
|
||||
let mut output = String::new();
|
||||
output.push_str(&format!("{:?}\n", self.kind));
|
||||
for child in self.children.iter() {
|
||||
child.dump_rec(2, &mut output);
|
||||
}
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for Tree<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[{:?}", self.kind)?;
|
||||
for child in self.children.iter() {
|
||||
match child {
|
||||
Child::Token(t) => write!(f, " {:?}:'{}'", t.kind, t.as_str())?,
|
||||
Child::Tree(t) => write!(f, " {t:?}")?,
|
||||
}
|
||||
}
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Child<'a> {
|
||||
Token(Token<'a>),
|
||||
Tree(Tree<'a>),
|
||||
}
|
||||
|
||||
impl<'a> Child<'a> {
|
||||
fn dump_rec(&self, indent: usize, output: &mut String) {
|
||||
for _ in 0..indent {
|
||||
output.push(' ');
|
||||
}
|
||||
match self {
|
||||
Child::Token(t) => output.push_str(&format!("{:?}:'{:?}'\n", t.kind, t.as_str())),
|
||||
Child::Tree(t) => {
|
||||
output.push_str(&format!("{:?}\n", t.kind));
|
||||
for child in t.children.iter() {
|
||||
child.dump_rec(indent + 2, output);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum ParseEvent<'a> {
|
||||
Start { kind: TreeKind },
|
||||
End,
|
||||
Advance { token: Token<'a> },
|
||||
}
|
||||
|
||||
struct MarkStarted {
|
||||
index: usize,
|
||||
}
|
||||
|
||||
struct MarkClosed {
|
||||
index: usize,
|
||||
}
|
||||
|
||||
struct CParser<'a> {
|
||||
tokens: Tokens<'a>,
|
||||
current: Token<'a>,
|
||||
fuel: Cell<u32>,
|
||||
events: Vec<ParseEvent<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> CParser<'a> {
|
||||
fn new(tokens: Tokens<'a>) -> Self {
|
||||
let mut parser = CParser {
|
||||
tokens,
|
||||
current: Token::new(TokenKind::EOF, 0, ""),
|
||||
fuel: Cell::new(256),
|
||||
events: Vec::new(),
|
||||
};
|
||||
parser.current = parser.tokens.next();
|
||||
parser.skip_ephemera();
|
||||
parser
|
||||
}
|
||||
|
||||
fn start(&mut self) -> MarkStarted {
|
||||
let mark = MarkStarted {
|
||||
index: self.events.len(),
|
||||
};
|
||||
self.events.push(ParseEvent::Start {
|
||||
kind: TreeKind::Error,
|
||||
});
|
||||
mark
|
||||
}
|
||||
|
||||
fn end(&mut self, mark: MarkStarted, kind: TreeKind) -> MarkClosed {
|
||||
self.events[mark.index] = ParseEvent::Start { kind };
|
||||
self.events.push(ParseEvent::End);
|
||||
MarkClosed { index: mark.index }
|
||||
}
|
||||
|
||||
fn start_before(&mut self, mark: MarkClosed) -> MarkStarted {
|
||||
// TODO: Point backwards and pointer chase in tree build?
|
||||
let mark = MarkStarted { index: mark.index };
|
||||
self.events.insert(
|
||||
mark.index,
|
||||
ParseEvent::Start {
|
||||
kind: TreeKind::Error,
|
||||
},
|
||||
);
|
||||
mark
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
assert!(!self.eof()); // Don't try to advance past EOF
|
||||
self.fuel.set(256); // Consuming a token, rest stuck detector
|
||||
self.events.push(ParseEvent::Advance {
|
||||
token: self.current.clone(),
|
||||
});
|
||||
self.current = self.tokens.next();
|
||||
self.skip_ephemera();
|
||||
}
|
||||
|
||||
fn skip_ephemera(&mut self) {
|
||||
while self.current.kind == TokenKind::Whitespace || self.current.kind == TokenKind::Comment
|
||||
{
|
||||
self.current = self.tokens.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn eof(&self) -> bool {
|
||||
self.current.kind == TokenKind::EOF
|
||||
}
|
||||
|
||||
fn peek(&self) -> TokenKind {
|
||||
assert!(self.fuel.get() > 0, "parser is stuck!");
|
||||
self.fuel.set(self.fuel.get() - 1);
|
||||
self.current.kind
|
||||
}
|
||||
|
||||
fn at(&self, kind: TokenKind) -> bool {
|
||||
self.peek() == kind
|
||||
}
|
||||
|
||||
fn eat(&mut self, kind: TokenKind) -> bool {
|
||||
if self.at(kind) {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn expect<T>(&mut self, kind: TokenKind, error: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
if self.eat(kind) {
|
||||
return;
|
||||
}
|
||||
self.error(error);
|
||||
}
|
||||
|
||||
fn advance_with_error<T>(&mut self, error: T) -> MarkClosed
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
let m = self.start();
|
||||
self.error(error);
|
||||
self.advance();
|
||||
self.end(m, TreeKind::Error)
|
||||
}
|
||||
|
||||
fn error<T>(&mut self, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
self.error_at(self.current.clone(), message)
|
||||
}
|
||||
|
||||
fn error_at<T>(&mut self, token: Token<'a>, message: T)
|
||||
where
|
||||
T: Into<String>,
|
||||
{
|
||||
let message: String = message.into();
|
||||
let mut final_message = "Error ".to_string();
|
||||
|
||||
if token.kind == TokenKind::EOF {
|
||||
final_message.push_str("at end")
|
||||
} else if token.kind != TokenKind::Error {
|
||||
final_message.push_str("at '");
|
||||
final_message.push_str(token.as_str());
|
||||
final_message.push_str("'");
|
||||
}
|
||||
final_message.push_str(": ");
|
||||
final_message.push_str(&message);
|
||||
|
||||
self.events.push(ParseEvent::Advance {
|
||||
token: Token::error(token.start, final_message),
|
||||
});
|
||||
}
|
||||
|
||||
fn build_tree(self) -> (Tree<'a>, Lines) {
|
||||
let mut events = self.events;
|
||||
let mut stack = Vec::new();
|
||||
|
||||
// The first element in our events vector must be a start; the whole
|
||||
// thing must be bracketed in a tree.
|
||||
assert!(matches!(events.get(0), Some(ParseEvent::Start { .. })));
|
||||
|
||||
// The last element in our events vector must be an end, otherwise
|
||||
// the parser has failed badly. We'll remove it here so that, after
|
||||
// processing the entire array, the stack retains the tree that we
|
||||
// start with the very first ::Start.
|
||||
assert!(matches!(events.pop(), Some(ParseEvent::End)));
|
||||
|
||||
for event in events {
|
||||
match event {
|
||||
ParseEvent::Start { kind } => stack.push(Tree {
|
||||
kind,
|
||||
children: Vec::new(),
|
||||
}),
|
||||
|
||||
ParseEvent::End => {
|
||||
let tree = stack.pop().unwrap();
|
||||
stack.last_mut().unwrap().children.push(Child::Tree(tree));
|
||||
}
|
||||
|
||||
ParseEvent::Advance { token } => {
|
||||
stack.last_mut().unwrap().children.push(Child::Token(token));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert!(stack.len() == 1, "Not all trees were ended!");
|
||||
(stack.pop().unwrap(), self.tokens.lines())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_concrete(source: &str) -> (Tree, Lines) {
|
||||
let tokens = Tokens::new(source);
|
||||
let mut parser = CParser::new(tokens);
|
||||
|
||||
file(&mut parser);
|
||||
|
||||
parser.build_tree()
|
||||
}
|
||||
|
||||
fn file(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
while !p.eof() {
|
||||
match p.peek() {
|
||||
TokenKind::Fun => function(p),
|
||||
_ => statement(p),
|
||||
}
|
||||
}
|
||||
p.end(m, TreeKind::File);
|
||||
}
|
||||
|
||||
fn function(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Fun));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::Fun, "expected a function to start with 'fun'");
|
||||
p.expect(TokenKind::Identifier, "expected a function name");
|
||||
if p.at(TokenKind::LeftParen) {
|
||||
param_list(p);
|
||||
}
|
||||
if p.eat(TokenKind::Arrow) {
|
||||
type_expr(p);
|
||||
}
|
||||
if p.at(TokenKind::LeftBrace) {
|
||||
block(p);
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::FunDecl);
|
||||
}
|
||||
|
||||
fn param_list(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::LeftParen));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::LeftParen, "expect '(' to start a parameter list");
|
||||
while !p.at(TokenKind::RightParen) && !p.eof() {
|
||||
if p.at(TokenKind::Identifier) {
|
||||
parameter(p);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
p.expect(TokenKind::RightParen, "expect ')' to end a parameter list");
|
||||
|
||||
p.end(m, TreeKind::ParamList);
|
||||
}
|
||||
|
||||
fn parameter(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Identifier));
|
||||
let m = p.start();
|
||||
p.expect(
|
||||
TokenKind::Identifier,
|
||||
"expected an identifier for a parameter name",
|
||||
);
|
||||
if p.eat(TokenKind::Colon) {
|
||||
type_expr(p);
|
||||
}
|
||||
if !p.at(TokenKind::RightParen) {
|
||||
p.expect(TokenKind::Comma, "expected a comma between parameters");
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::Parameter);
|
||||
}
|
||||
|
||||
fn type_expr(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
// TODO: Other kinds of type expressions probably!
|
||||
p.expect(TokenKind::Identifier, "expected the identifier of a type");
|
||||
p.end(m, TreeKind::TypeExpression);
|
||||
}
|
||||
|
||||
fn block(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::LeftBrace));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::LeftBrace, "expect '{' to start a block");
|
||||
while !p.at(TokenKind::RightBrace) && !p.eof() {
|
||||
statement(p);
|
||||
}
|
||||
p.expect(TokenKind::RightBrace, "expect '}' to start a block");
|
||||
|
||||
p.end(m, TreeKind::Block);
|
||||
}
|
||||
|
||||
fn statement(p: &mut CParser) {
|
||||
match p.peek() {
|
||||
TokenKind::LeftBrace => block(p),
|
||||
TokenKind::Let => statement_let(p),
|
||||
TokenKind::Return => statement_return(p),
|
||||
|
||||
// NOTE: Technically 'if' is an expression, but `if` doesn't
|
||||
// require a semicolon at the end if it's all by itself.
|
||||
TokenKind::If => statement_if(p),
|
||||
|
||||
_ => statement_expression(p),
|
||||
}
|
||||
}
|
||||
|
||||
fn statement_if(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::If));
|
||||
let m = p.start();
|
||||
|
||||
conditional(p);
|
||||
|
||||
p.end(m, TreeKind::IfStatement);
|
||||
}
|
||||
|
||||
fn statement_let(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Let));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::Let, "expect 'let' to start a let statement");
|
||||
p.expect(TokenKind::Identifier, "expected a name for the variable");
|
||||
p.expect(TokenKind::Equal, "expected a '=' after the variable name");
|
||||
expression(p);
|
||||
p.expect(TokenKind::Semicolon, "expect ';' to end a let statement");
|
||||
|
||||
p.end(m, TreeKind::LetStatement);
|
||||
}
|
||||
|
||||
fn statement_return(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::Return));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(
|
||||
TokenKind::Return,
|
||||
"expect 'return' to start a return statement",
|
||||
);
|
||||
expression(p);
|
||||
p.expect(TokenKind::Semicolon, "expect ';' to end a return statement");
|
||||
|
||||
p.end(m, TreeKind::ReturnStatement);
|
||||
}
|
||||
|
||||
fn statement_expression(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
|
||||
expression(p);
|
||||
p.expect(
|
||||
TokenKind::Semicolon,
|
||||
"expect ';' to end an expression statement",
|
||||
);
|
||||
|
||||
p.end(m, TreeKind::ExpressionStatement);
|
||||
}
|
||||
|
||||
fn expression(p: &mut CParser) {
|
||||
expression_with_power(p, 0)
|
||||
}
|
||||
|
||||
fn expression_with_power(p: &mut CParser, minimum_power: u8) {
|
||||
let mut expr = prefix_expression(p);
|
||||
while p.at(TokenKind::LeftParen) {
|
||||
let m = p.start_before(expr);
|
||||
argument_list(p);
|
||||
expr = p.end(m, TreeKind::CallExpression);
|
||||
}
|
||||
|
||||
loop {
|
||||
let Some(power) = token_power(p.peek()) else {
|
||||
break;
|
||||
};
|
||||
if power < minimum_power {
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: I don't think this works for other "infix" types, but we'll
|
||||
// see won't we.
|
||||
let m = p.start_before(expr);
|
||||
p.advance(); // Consume the operator
|
||||
expression_with_power(p, power);
|
||||
expr = p.end(m, TreeKind::BinaryExpression);
|
||||
}
|
||||
}
|
||||
|
||||
fn argument_list(p: &mut CParser) {
|
||||
assert!(p.at(TokenKind::LeftParen));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(
|
||||
TokenKind::LeftParen,
|
||||
"expect an argument list to start with '('",
|
||||
);
|
||||
while !p.at(TokenKind::RightParen) && !p.eof() {
|
||||
argument(p);
|
||||
}
|
||||
p.expect(
|
||||
TokenKind::RightParen,
|
||||
"expect an argument list to start with '('",
|
||||
);
|
||||
|
||||
p.end(m, TreeKind::ArgumentList);
|
||||
}
|
||||
|
||||
fn argument(p: &mut CParser) {
|
||||
let m = p.start();
|
||||
|
||||
expression(p);
|
||||
if !p.at(TokenKind::RightParen) {
|
||||
p.expect(TokenKind::Comma, "expect a ',' between arguments");
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::Argument);
|
||||
}
|
||||
|
||||
fn prefix_expression(p: &mut CParser) -> MarkClosed {
|
||||
match p.peek() {
|
||||
TokenKind::Number => literal(p),
|
||||
TokenKind::String => literal(p),
|
||||
TokenKind::True => literal(p),
|
||||
TokenKind::False => literal(p),
|
||||
|
||||
TokenKind::LeftParen => grouping(p),
|
||||
|
||||
TokenKind::Bang => unary(p),
|
||||
TokenKind::Minus => unary(p),
|
||||
|
||||
TokenKind::If => conditional(p),
|
||||
|
||||
TokenKind::Identifier => identifier(p),
|
||||
|
||||
_ => p.advance_with_error("expected an expression"),
|
||||
}
|
||||
}
|
||||
|
||||
fn literal(p: &mut CParser) -> MarkClosed {
|
||||
let m = p.start();
|
||||
p.advance();
|
||||
p.end(m, TreeKind::LiteralExpression)
|
||||
}
|
||||
|
||||
fn grouping(p: &mut CParser) -> MarkClosed {
|
||||
assert!(p.at(TokenKind::LeftParen));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::LeftParen, "expected '(' to start grouping");
|
||||
expression(p);
|
||||
p.expect(TokenKind::RightParen, "unmatched parentheses in expression");
|
||||
|
||||
p.end(m, TreeKind::GroupingExpression)
|
||||
}
|
||||
|
||||
fn unary(p: &mut CParser) -> MarkClosed {
|
||||
let m = p.start();
|
||||
|
||||
p.advance(); // Past the operator
|
||||
expression_with_power(p, UNARY_POWER);
|
||||
|
||||
p.end(m, TreeKind::UnaryExpression)
|
||||
}
|
||||
|
||||
fn conditional(p: &mut CParser) -> MarkClosed {
|
||||
assert!(p.at(TokenKind::If));
|
||||
let m = p.start();
|
||||
|
||||
p.expect(TokenKind::If, "expected conditional to start with 'if'");
|
||||
expression(p);
|
||||
block(p);
|
||||
if p.eat(TokenKind::Else) {
|
||||
if p.at(TokenKind::If) {
|
||||
// Don't require another block, just jump right into the conditional.
|
||||
conditional(p);
|
||||
} else {
|
||||
block(p);
|
||||
}
|
||||
}
|
||||
|
||||
p.end(m, TreeKind::ConditionalExpression)
|
||||
}
|
||||
|
||||
fn identifier(p: &mut CParser) -> MarkClosed {
|
||||
assert!(p.at(TokenKind::Identifier));
|
||||
let m = p.start();
|
||||
|
||||
p.advance();
|
||||
|
||||
p.end(m, TreeKind::Identifier)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn test_successful_expression_parse(source: &str, expected: &str) {
|
||||
let tokens = Tokens::new(source);
|
||||
let mut parser = CParser::new(tokens);
|
||||
|
||||
expression(&mut parser);
|
||||
|
||||
let (tree, _) = parser.build_tree();
|
||||
assert_eq!(
|
||||
expected,
|
||||
format!("{tree:?}"),
|
||||
"The parse structure of the expressions did not match"
|
||||
);
|
||||
}
|
||||
|
||||
macro_rules! test_expr {
|
||||
($name:ident, $input:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
test_successful_expression_parse($input, $expected);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_expr!(number_expr, "12", "[LiteralExpression Number:'12']");
|
||||
}
|
||||
|
|
@ -3,9 +3,6 @@ pub enum TokenKind {
|
|||
EOF,
|
||||
Error,
|
||||
|
||||
Whitespace,
|
||||
Comment,
|
||||
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
LeftBracket,
|
||||
|
|
@ -393,7 +390,7 @@ impl<'a> Tokens<'a> {
|
|||
self.next_char.is_none()
|
||||
}
|
||||
|
||||
fn whitespace(&mut self, pos: usize) -> Token<'a> {
|
||||
fn skip_whitespace(&mut self) {
|
||||
while let Some((pos, ch)) = self.next_char {
|
||||
if ch == '\n' {
|
||||
self.lines.add_line(pos);
|
||||
|
|
@ -402,27 +399,16 @@ impl<'a> Tokens<'a> {
|
|||
}
|
||||
self.advance();
|
||||
}
|
||||
self.token(pos, TokenKind::Whitespace)
|
||||
}
|
||||
|
||||
fn comment(&mut self, pos: usize) -> Token<'a> {
|
||||
while let Some((_, ch)) = self.next_char {
|
||||
if ch == '\n' {
|
||||
break;
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
self.token(pos, TokenKind::Comment)
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Token<'a> {
|
||||
self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving
|
||||
let (pos, c) = match self.advance() {
|
||||
Some((p, c)) => (p, c),
|
||||
None => return self.token(self.source.len(), TokenKind::EOF),
|
||||
};
|
||||
|
||||
match c {
|
||||
' ' | '\t' | '\r' | '\n' => self.whitespace(pos),
|
||||
'{' => self.token(pos, TokenKind::LeftBrace),
|
||||
'}' => self.token(pos, TokenKind::RightBrace),
|
||||
'[' => self.token(pos, TokenKind::LeftBracket),
|
||||
|
|
@ -441,13 +427,7 @@ impl<'a> Tokens<'a> {
|
|||
'+' => self.token(pos, TokenKind::Plus),
|
||||
':' => self.token(pos, TokenKind::Colon),
|
||||
';' => self.token(pos, TokenKind::Semicolon),
|
||||
'/' => {
|
||||
if self.matches('/') {
|
||||
self.comment(pos)
|
||||
} else {
|
||||
self.token(pos, TokenKind::Slash)
|
||||
}
|
||||
}
|
||||
'/' => self.token(pos, TokenKind::Slash),
|
||||
'*' => self.token(pos, TokenKind::Star),
|
||||
'!' => {
|
||||
if self.matches('=') {
|
||||
|
|
@ -504,9 +484,6 @@ mod tests {
|
|||
while !is_eof {
|
||||
let token = tokens.next();
|
||||
is_eof = token.kind == TokenKind::EOF;
|
||||
if token.kind == TokenKind::Whitespace {
|
||||
continue;
|
||||
}
|
||||
result.push(token);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +0,0 @@
|
|||
use fine::parser::concrete::Tree;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn assert_concrete(tree: &Tree, expected: &str) {
|
||||
assert_eq!(tree.dump(), expected, "concrete syntax trees did not match");
|
||||
}
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/generated_tests.rs"));
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
// concrete:
|
||||
// | File
|
||||
// | ExpressionStatement
|
||||
// | LiteralExpression
|
||||
// | Number:'"42"'
|
||||
// | Semicolon:'";"'
|
||||
// | ExpressionStatement
|
||||
// | BinaryExpression
|
||||
// | BinaryExpression
|
||||
// | LiteralExpression
|
||||
// | Number:'"1"'
|
||||
// | Star:'"*"'
|
||||
// | LiteralExpression
|
||||
// | Number:'"2"'
|
||||
// | Plus:'"+"'
|
||||
// | BinaryExpression
|
||||
// | UnaryExpression
|
||||
// | Minus:'"-"'
|
||||
// | LiteralExpression
|
||||
// | Number:'"3"'
|
||||
// | Star:'"*"'
|
||||
// | LiteralExpression
|
||||
// | Number:'"4"'
|
||||
// | Semicolon:'";"'
|
||||
//
|
||||
42;
|
||||
1 * 2 + -3 * 4;
|
||||
Loading…
Add table
Add a link
Reference in a new issue