178 lines
4.8 KiB
Rust
178 lines
4.8 KiB
Rust
use std::{str::CharIndices, iter::Peekable, fmt};
|
|
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
pub enum Token<'i> {
|
|
Float(f64),
|
|
Int(i32),
|
|
Name(&'i str),
|
|
Sum, Prod, Iter,
|
|
LParen, RParen,
|
|
LBrace, RBrace,
|
|
Plus, Minus, Star, Slash, Caret,
|
|
Comma, Arrow, Equal, Colon,
|
|
Newline,
|
|
}
|
|
|
|
impl<'i> fmt::Display for Token<'i> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Token::Float(n) => write!(f, "{n}"),
|
|
Token::Int(n) => write!(f, "{n}"),
|
|
Token::Name(n) => write!(f, "{n}"),
|
|
Token::Sum => f.write_str("sum"),
|
|
Token::Prod => f.write_str("prod"),
|
|
Token::Iter => f.write_str("iter"),
|
|
Token::LParen => f.write_str("("),
|
|
Token::RParen => f.write_str(")"),
|
|
Token::LBrace => f.write_str("{"),
|
|
Token::RBrace => f.write_str("}"),
|
|
Token::Plus => f.write_str("+"),
|
|
Token::Minus => f.write_str("-"),
|
|
Token::Star => f.write_str("*"),
|
|
Token::Slash => f.write_str("/"),
|
|
Token::Caret => f.write_str("^"),
|
|
Token::Comma => f.write_str(","),
|
|
Token::Arrow => f.write_str("->"),
|
|
Token::Equal => f.write_str("="),
|
|
Token::Colon => f.write_str(":"),
|
|
Token::Newline => f.write_str("newline")
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
pub enum LexerError {
|
|
Unexpected(usize, char),
|
|
InvalidNumber(usize, usize),
|
|
}
|
|
|
|
impl fmt::Display for LexerError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
LexerError::Unexpected(i, c) => write!(f, "Unexpected character {c:?} at {i}"),
|
|
LexerError::InvalidNumber(i, j) => write!(f, "Invalid number at {i}:{j}"),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub type Spanned<T, L, E> = Result<(L, T, L), E>;
|
|
|
|
pub struct Lexer<'i> {
|
|
src: &'i str,
|
|
chars: Peekable<CharIndices<'i>>,
|
|
bracket_depth: usize,
|
|
}
|
|
|
|
fn is_ident_begin(c: char) -> bool {
|
|
c.is_alphabetic()
|
|
}
|
|
|
|
fn is_ident_middle(c: char) -> bool {
|
|
c.is_alphanumeric() || c == '_' || c == '\''
|
|
}
|
|
|
|
impl<'i> Lexer<'i> {
|
|
pub fn new(src: &'i str) -> Self {
|
|
Self {
|
|
src,
|
|
chars: src.char_indices().peekable(),
|
|
bracket_depth: 0,
|
|
}
|
|
}
|
|
|
|
fn next_number(&mut self, i: usize, mut has_dot: bool) -> Spanned<Token<'i>, usize, LexerError> {
|
|
let mut j = i;
|
|
|
|
while self.chars.peek().is_some_and(|(_, c)| c.is_ascii_digit()) {
|
|
j = self.chars.next().unwrap().0;
|
|
}
|
|
|
|
if !has_dot && matches!(self.chars.peek(), Some((_, '.'))) {
|
|
j = self.chars.next().unwrap().0;
|
|
has_dot = true;
|
|
while self.chars.peek().is_some_and(|(_, c)| c.is_ascii_digit()) {
|
|
j = self.chars.next().unwrap().0;
|
|
}
|
|
}
|
|
|
|
let s = &self.src[i..j+1];
|
|
if !has_dot {
|
|
if let Ok(n) = s.parse::<i32>() {
|
|
return Ok((i, Token::Int(n), j+1))
|
|
}
|
|
}
|
|
match s.parse::<f64>() {
|
|
Ok(n) => Ok((i, Token::Float(n), j+1)),
|
|
Err(_) => Err(LexerError::InvalidNumber(i, j+1)),
|
|
}
|
|
}
|
|
|
|
fn next_word(&mut self, i: usize, mut j: usize) -> Spanned<Token<'i>, usize, LexerError> {
|
|
while self.chars.peek().is_some_and(|(_, c)| is_ident_middle(*c)) {
|
|
j += self.chars.next().unwrap().1.len_utf8();
|
|
}
|
|
|
|
let s = &self.src[i..j];
|
|
match s {
|
|
"sum" => Ok((i, Token::Sum, j)),
|
|
"prod" => Ok((i, Token::Prod, j)),
|
|
"iter" => Ok((i, Token::Iter, j)),
|
|
_ => Ok((i, Token::Name(s), j)),
|
|
}
|
|
}
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
while matches!(self.chars.peek(), Some((_, ' ' | '\t' | '\n' | '\r'))) {
|
|
if self.bracket_depth == 0 && matches!(self.chars.peek(), Some((_, '\n'))) {
|
|
break
|
|
}
|
|
self.chars.next();
|
|
}
|
|
}
|
|
|
|
fn next_token(&mut self) -> Option<Spanned<Token<'i>, usize, LexerError>> {
|
|
self.skip_whitespace();
|
|
|
|
Some(match self.chars.next()? {
|
|
(_, '#') => {
|
|
while !matches!(self.chars.peek(), Some((_, '\n')) | None) {
|
|
self.chars.next();
|
|
}
|
|
self.next_token()?
|
|
}
|
|
(i, '(') => { self.bracket_depth += 1; Ok((i, Token::LParen, i + 1)) },
|
|
(i, ')') => { self.bracket_depth -= 1; Ok((i, Token::RParen, i + 1)) },
|
|
(i, '{') => { self.bracket_depth += 1; Ok((i, Token::LBrace, i + 1)) },
|
|
(i, '}') => { self.bracket_depth -= 1; Ok((i, Token::RBrace, i + 1)) },
|
|
(i, '+') => Ok((i, Token::Plus, i + 1)),
|
|
(i, '-') => match self.chars.peek() {
|
|
Some((_, '>')) => {
|
|
self.chars.next();
|
|
Ok((i, Token::Arrow, i + 2))
|
|
},
|
|
_ => Ok((i, Token::Minus, i + 1)),
|
|
}
|
|
(i, '*') => Ok((i, Token::Star, i + 1)),
|
|
(i, '/') => Ok((i, Token::Slash, i + 1)),
|
|
(i, '^') => Ok((i, Token::Caret, i + 1)),
|
|
(i, ',') => Ok((i, Token::Comma, i + 1)),
|
|
(i, '=') => Ok((i, Token::Equal, i + 1)),
|
|
(i, ':') => Ok((i, Token::Colon, i + 1)),
|
|
(i, '\n') => Ok((i, Token::Newline, i + 1)),
|
|
(i, '0'..='9') => self.next_number(i, false),
|
|
(i, '.') => self.next_number(i, true),
|
|
(i, c) if is_ident_begin(c) => self.next_word(i, i + c.len_utf8()),
|
|
(i, c) => Err(LexerError::Unexpected(i, c)),
|
|
})
|
|
}
|
|
}
|
|
|
|
impl<'i> Iterator for Lexer<'i> {
|
|
type Item = Spanned<Token<'i>, usize, LexerError>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.next_token()
|
|
}
|
|
}
|