diff --git a/.gitignore b/.gitignore index 04f1b64e..19b8760f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /oden-js/target /oden-js-sys/target +/oden-script/target \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index f6aa2494..2e15502a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1545,6 +1545,7 @@ dependencies = [ "lru", "notify", "oden-js", + "oden-script", "pollster", "sourcemap 7.0.0", "tracy-client", @@ -1571,6 +1572,10 @@ dependencies = [ "walkdir", ] +[[package]] +name = "oden-script" +version = "0.1.0" + [[package]] name = "once_cell" version = "1.18.0" diff --git a/Cargo.toml b/Cargo.toml index aba24a55..0b059006 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ log = "0.4" lru = "0.11.0" notify = "6" oden-js = { path = "oden-js" } +oden-script = { path = "oden-script" } pollster = "0.3" sourcemap = "7.0.0" tracy-client = { version = "0.15.2", default-features = false } diff --git a/oden-script/Cargo.lock b/oden-script/Cargo.lock new file mode 100644 index 00000000..459de42c --- /dev/null +++ b/oden-script/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "oden-script" +version = "0.1.0" diff --git a/oden-script/Cargo.toml b/oden-script/Cargo.toml new file mode 100644 index 00000000..87a1f807 --- /dev/null +++ b/oden-script/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "oden-script" +version = "0.1.0" +edition = "2021" diff --git a/oden-script/src/lib.rs b/oden-script/src/lib.rs new file mode 100644 index 00000000..e0d6d806 --- /dev/null +++ b/oden-script/src/lib.rs @@ -0,0 +1,458 @@ +#[derive(Debug)] +pub enum TokenKind<'a> { + LeftBrace, + RightBrace, + LeftBracket, + RightBracket, + LeftParen, + RightParen, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + Identifier(&'a str), // TODO + String(&'a str), + Number(&'a str), + + And, + Async, + Await, + Class, + Else, + False, + For, + From, + Fun, + If, + Let, + Or, + Print, + Return, + Select, + This, + True, + While, + Yield, + + Error(String), +} + +#[derive(Debug)] +pub struct Token<'a> { + kind: TokenKind<'a>, + start: usize, +} + +impl<'a> Token<'a> { + pub fn as_str<'b>(&'b self) -> &'a str + where + 'b: 'a, + { + use TokenKind::*; + match &self.kind { + LeftBrace => "{", + RightBrace => "}", + LeftBracket => "[", + RightBracket => "]", + + LeftParen => "(", + RightParen => ")", + Comma => ",", + Dot => ".", + Minus => "-", + + Plus => "+", + Semicolon => ";", + Slash => "/", + Star => "*", + + Bang => "+", + BangEqual => "!=", + Equal => "=", + EqualEqual => "==", + Greater => ">", + GreaterEqual => ">=", + Less => "<", + LessEqual => "<=", + + Identifier(v) => v, + String(v) => v, + Number(v) => v, + + And => "and", + Async => "async", + Await => "await", + Class => "class", + Else => "else", + False => "false", + For => "for", + From => "from", + Fun => "fun", + If => "if", + Let => "let", + Or => "or", + Print => "print", + Return => "return", + Select => "select", + This => "this", + True => "true", + While => "while", + Yield => "yield", + + Error(e) => e, + } + } +} + +pub struct Tokens<'a> { + source: &'a str, + chars: std::str::CharIndices<'a>, + next_char: Option<(usize, char)>, + newlines: Vec, +} + +impl<'a> Tokens<'a> { + pub fn new(source: &'a str) -> Self { + let mut chars = source.char_indices(); + let next_char = chars.next(); + Tokens { + source, + chars, + next_char, + newlines: Vec::new(), + } + } + + pub fn token_position(&self, token: &Token) -> (usize, usize) { + let line_end_index = match self.newlines.binary_search(&token.start) { + Ok(index) => index, + Err(index) => index, + }; + let line_start_pos = if line_end_index == 0 { + 0 + } else { + self.newlines[line_end_index - 1] + 1 + }; + let line_number = line_end_index + 1; + let column_offset = token.start - line_start_pos; + (line_number, column_offset) + } + + pub fn next_token(&mut self) -> Option> { + self.skip_whitespace(); // TODO: Whitespace preserving/comment preserving + let (pos, c) = match self.advance() { + Some((p, c)) => (p, c), + None => return None, + }; + + let token = match c { + '{' => TokenKind::LeftBrace, + '}' => TokenKind::RightBrace, + '[' => TokenKind::LeftBracket, + ']' => TokenKind::RightBracket, + '(' => TokenKind::LeftParen, + ')' => TokenKind::RightParen, + ',' => TokenKind::Comma, + '.' => TokenKind::Dot, + '-' => { + if self.matches_next(|c| c.is_ascii_digit()) { + self.number(pos) + } else { + TokenKind::Minus + } + } + '+' => { + if self.matches_next(|c| c.is_ascii_digit()) { + self.number(pos) + } else { + TokenKind::Plus + } + } + ';' => TokenKind::Semicolon, + '/' => TokenKind::Slash, + '*' => TokenKind::Star, + '!' => { + if self.matches('=') { + TokenKind::BangEqual + } else { + TokenKind::Bang + } + } + '=' => { + if self.matches('=') { + TokenKind::EqualEqual + } else { + TokenKind::Equal + } + } + '>' => { + if self.matches('=') { + TokenKind::GreaterEqual + } else { + TokenKind::Greater + } + } + '<' => { + if self.matches('=') { + TokenKind::LessEqual + } else { + TokenKind::Less + } + } + '\'' => self.string(pos, '\''), + '"' => self.string(pos, '"'), + _ => { + if self.matches_next(|c| c.is_ascii_digit()) { + self.number(pos) + } else if self.matches_next(|c| c.is_ascii_alphabetic() || c == '_') { + self.identifier(pos) + } else { + TokenKind::Error(format!("Unexpected character '{c}'")) + } + } + }; + let token = self.token(pos, token); + Some(token) + } + + fn token(&self, start: usize, kind: TokenKind<'a>) -> Token<'a> { + Token { kind, start } + } + + fn number(&mut self, start: usize) -> TokenKind<'a> { + // First, the main part. + loop { + if !self.matches_digit() { + break; + } + } + + // Now the fraction part. + // The thing that is bad here is that this is speculative... + let backup = self.chars.clone(); + if self.matches('.') { + let mut saw_digit = false; + loop { + if self.matches('_') { + } else if self.matches_next(|c| c.is_ascii_digit()) { + saw_digit = true; + } else { + break; + } + } + + if saw_digit { + // OK we're good to here! Check the scientific notation. + if self.matches('e') || self.matches('E') { + if self.matches('+') || self.matches('-') {} + let mut saw_digit = false; + loop { + if self.matches('_') { + } else if self.matches_next(|c| c.is_ascii_digit()) { + saw_digit = true; + } else { + break; + } + } + + if !saw_digit { + // This is just a broken number. + let slice = &self.source[start..self.pos()]; + return TokenKind::Error(format!( + "Invalid floating-point literal: {slice}" + )); + } + } + } else { + // Might be accessing a member on an integer. + self.chars = backup; + } + } + + TokenKind::Number(&self.source[start..self.pos()]) + } + + fn string(&mut self, start: usize, delimiter: char) -> TokenKind<'a> { + while !self.matches(delimiter) { + if self.eof() { + return TokenKind::Error("Unterminated string constant".to_string()); + } + if self.matches('\\') { + self.advance(); + } + } + + TokenKind::String(&self.source[start..self.pos()]) + } + + fn identifier(&mut self, start: usize) -> TokenKind<'a> { + loop { + // TODO: Use unicode identifier classes instead + if !self.matches_next(|c| c.is_ascii_alphanumeric() || c == '_') { + break; + } + } + + let ident = &self.source[start..self.pos()]; + match ident.chars().nth(0) { + Some('a') => { + if ident == "and" { + return TokenKind::And; + } + if ident == "async" { + return TokenKind::Async; + } + if ident == "await" { + return TokenKind::Await; + } + } + Some('c') => { + if ident == "class" { + return TokenKind::Class; + } + } + Some('e') => { + if ident == "else" { + return TokenKind::Else; + } + } + Some('f') => { + if ident == "for" { + return TokenKind::For; + } + if ident == "from" { + return TokenKind::From; + } + if ident == "fun" { + return TokenKind::Fun; + } + } + Some('i') => { + if ident == "if" { + return TokenKind::If; + } + } + Some('l') => { + if ident == "let" { + return TokenKind::Let; + } + } + Some('o') => { + if ident == "or" { + return TokenKind::Or; + } + } + Some('p') => { + if ident == "print" { + return TokenKind::Print; + } + } + Some('r') => { + if ident == "return" { + return TokenKind::Return; + } + } + Some('s') => { + if ident == "select" { + return TokenKind::Select; + } + } + Some('t') => { + if ident == "this" { + return TokenKind::This; + } + if ident == "true" { + return TokenKind::True; + } + } + Some('w') => { + if ident == "while" { + return TokenKind::While; + } + } + Some('y') => { + if ident == "yield" { + return TokenKind::Yield; + } + } + _ => (), + } + + TokenKind::Identifier(ident) + } + + fn matches(&mut self, ch: char) -> bool { + if let Some((_, next_ch)) = self.next_char { + if next_ch == ch { + self.advance(); + return true; + } + } + false + } + + fn matches_next(&mut self, f: F) -> bool + where + F: FnOnce(char) -> bool, + { + if let Some((_, next_ch)) = self.next_char { + if f(next_ch) { + self.advance(); + return true; + } + } + false + } + + fn matches_digit(&mut self) -> bool { + self.matches('_') || self.matches_next(|c| c.is_ascii_digit()) + } + + fn advance(&mut self) -> Option<(usize, char)> { + let result = self.next_char; + self.next_char = self.chars.next(); + result + } + + fn pos(&self) -> usize { + match self.next_char { + Some((p, _)) => p, + None => self.source.len(), + } + } + + fn eof(&self) -> bool { + self.next_char.is_none() + } + + fn skip_whitespace(&mut self) { + while let Some((pos, ch)) = self.next_char { + if ch == '\n' { + self.newlines.push(pos); + } else if !ch.is_whitespace() { + break; + } + self.advance(); + } + } +} + +pub fn tokenize(input: String) { + let mut tokens = Tokens::new(&input); + while let Some(token) = tokens.next_token() { + println!("{}: {}", token.start, token.as_str()); + } +} diff --git a/oden-script/src/main.rs b/oden-script/src/main.rs new file mode 100644 index 00000000..7f158ec7 --- /dev/null +++ b/oden-script/src/main.rs @@ -0,0 +1,3 @@ +use oden_script; + +pub fn main() {}