use crate::{token::{Token, TokenType, OpType}, ParserError, expr::{Stmt, Expr}, value}; pub struct Parser { tokens: Vec, repl: bool, idx: usize } impl Parser { pub fn new(tokens: Vec, repl: bool) -> Self { Self { tokens, repl, idx: 0 } } pub fn parse(&mut self) -> Result, ParserError> { let mut stmts = vec![]; while !self.at_end() { stmts.push(self.statement(!self.repl)?); } Ok(stmts) } //////////////////////// // // // Helper functions // // // //////////////////////// fn at_end(&self) -> bool { self.idx >= self.tokens.len() } fn peek(&self) -> &Token { &self.tokens[self.idx] } fn next(&mut self) -> Token { let t = self.tokens[self.idx].clone(); self.idx += 1; t } fn expect(&mut self, tokty: TokenType) -> (bool, Token) { let next = self.next(); (tokty == next.ty, next) } fn mk_error(&self, msg: S) -> ParserError where S: Into { let token = if self.at_end() { self.tokens.last().unwrap() } else { self.peek() }; ParserError { pos: token.pos.clone(), message: msg.into() } } fn err_on_eof(&self) -> Result<(), ParserError> { if self.at_end() { Err(self.mk_error("Unexpected EOF")) } else { Ok(()) } } fn ident(&mut self) -> Result { let next = self.next(); match next.ty { TokenType::Ident(_) => Ok(next), _ => Err(ParserError { message: "Expected identifier".into(), pos: next.pos }) } } fn commalist(&mut self, terminator: TokenType, parse_item: fn(&mut Parser) -> Result) -> Result, ParserError> { let mut items = vec![]; while !self.at_end() && self.peek().ty != terminator { let expr = parse_item(self)?; items.push(expr); self.err_on_eof()?; if self.peek().ty == TokenType::Comma { self.next(); } else if self.peek().ty == terminator { break; } else { return Err(self.mk_error(format!("Expected Comma or {:?} after list", terminator))) } } self.err_on_eof()?; self.next(); Ok(items) } ////////////////// // // // Statements // // // ////////////////// fn statement(&mut self, req_semicolon: bool) -> Result { let next_ty = &self.peek().ty; match next_ty { TokenType::Let => { // let statement self.next(); self.letstmt(req_semicolon) }, TokenType::LBrace => { // block self.next(); self.block() }, TokenType::If => { // if statement self.next(); self.ifstmt() }, TokenType::For => { // for loop self.next(); self.forstmt() }, TokenType::While => { // while loop self.next(); self.whilestmt() }, TokenType::Break => { let tok = self.next(); self.terminate_stmt(Stmt::Break{ pos: tok.pos }, req_semicolon) }, TokenType::Continue => { let tok = self.next(); self.terminate_stmt(Stmt::Continue{ pos: tok.pos }, req_semicolon) }, TokenType::Return => { let tok = self.next(); let expr = self.assignment()?; self.terminate_stmt(Stmt::Return{ pos: tok.pos, expr }, req_semicolon) }, TokenType::Fn => { self.next(); self.fndef() }, TokenType::Struct => { self.next(); self.structstmt() }, _ => { // fallback to an expression terminated with a semicolon let expr = self.assignment()?; self.terminate_stmt(Stmt::Expr{ expr }, req_semicolon) } } } fn terminate_stmt(&mut self, stmt: Stmt, req_semicolon: bool) -> Result { if !req_semicolon { return Ok(stmt) } if self.at_end() { self.err_on_eof()?; } match self.expect(TokenType::Semicolon) { (true, _) => Ok(stmt), (false, _) => Err(self.mk_error("Missing semicolon after statement")) } } fn letstmt(&mut self, req_semicolon: bool) -> Result { let expr = self.assignment()?; // must be followed by an assignment expression if let Expr::Binary{lhs, rhs, op: Token{ty: TokenType::Equal,..}} = expr { if let Expr::Ident{value: tok} = *lhs { self.terminate_stmt(Stmt::Let{lhs: tok, rhs: Some(*rhs)}, req_semicolon) } else { Err(self.mk_error("Invalid expression after 'let'".to_owned())) } } else if let Expr::Ident{value: tok} = expr { self.terminate_stmt(Stmt::Let{lhs: tok, rhs: None}, req_semicolon) } else { Err(self.mk_error("Invalid expression after 'let'".to_owned())) } } fn ifstmt(&mut self) -> Result { let mut if_clauses = vec![]; let mut ec = false; loop { let condition = self.assignment()?; let body = self.statement(true)?; if_clauses.push((condition, body)); match self.peek().ty { TokenType::Elif => { self.next(); continue }, TokenType::Else => { self.next(); ec = true; break }, _ => break } } let else_clause = if ec { Some(Box::new(self.statement(true)?)) } else { None }; Ok(Stmt::If{ if_clauses, else_clause }) } fn forstmt(&mut self) -> Result { self.err_on_eof()?; let var = self.next(); if let TokenType::Ident(_) = &var.ty { self.err_on_eof()?; let colon = self.next(); if colon.ty != TokenType::Colon { return Err(self.mk_error("Expected colon")) } self.err_on_eof()?; let expr = self.assignment()?; self.err_on_eof()?; let stmt = self.statement(true)?; Ok(Stmt::For{ var, expr, stmt: Box::new(stmt), iter_pos: colon.pos }) } else { Err(self.mk_error("Expected identifier after for")) } } fn whilestmt(&mut self) -> Result { self.err_on_eof()?; let expr = self.assignment()?; self.err_on_eof()?; let stmt = self.statement(true)?; Ok(Stmt::While{ expr, stmt: Box::new(stmt) }) } fn fndef(&mut self) -> Result { self.err_on_eof()?; let name = self.next(); let name = if let TokenType::Ident(_) = name.ty { name } else { return Err(ParserError { message: "Expected identifer in function declaration".into(), pos: name.pos }) }; self.err_on_eof()?; if !self.expect(TokenType::LParen).0 { return Err(self.mk_error("Expected left parenthesis to start arguments list")) } let args = self.commalist(TokenType::RParen, Self::ident)?; self.err_on_eof()?; let body = self.statement(false)?; Ok(Stmt::Fn { name, args, body: Box::new(body) }) } fn block(&mut self) -> Result { let mut stmts = vec![]; while !self.at_end() && self.peek().ty != TokenType::RBrace { stmts.push(self.statement(true)?) } self.err_on_eof()?; self.next(); Ok(Stmt::Block{ stmts }) } fn structstmt(&mut self) -> Result { self.err_on_eof()?; let tok_name = self.ident()?; let name = tok_name.ty.clone().as_ident().unwrap(); self.err_on_eof()?; if !self.expect(TokenType::LBrace).0 { return Err(self.mk_error("Expected left brace in struct definition")) } self.err_on_eof()?; let items = self.commalist(TokenType::RBrace, Self::ident)?; let ty = value::generate_type(name); Ok(Stmt::Struct { name: tok_name, ty, items }) } /////////////////// // // // Expressions // // // /////////////////// // Generic method for left-associative operators fn expr(&mut self, op_type: OpType, next_level: fn(&mut Parser) -> Result) -> Result { let mut expr = next_level(self)?; while !self.at_end() && self.peek().ty.get_op_type() == Some(op_type) { let op = self.next(); let right = next_level(self)?; expr = Expr::Binary { lhs: Box::new(expr), rhs: Box::new(right), op }; } Ok(expr) } fn assignment(&mut self) -> Result { let mut stack= vec![]; let mut expr = self.pipeline()?; while !self.at_end() && self.peek().ty.get_op_type() == Some(OpType::Assignment) { let op = self.next(); stack.push((expr, op)); expr = self.pipeline()?; } while let Some(item) = stack.pop() { if !item.0.is_lvalue() { return Err(self.mk_error("Invalid LValue for assignment operation")) } expr = Expr::Binary{ lhs: Box::new(item.0), rhs: Box::new(expr), op: item.1 }; } Ok(expr) } fn pipeline(&mut self) -> Result { let mut expr = self.logical_or()?; while !self.at_end() && self.peek().ty.get_op_type() == Some(OpType::Pipeline) { let op = self.next(); let right = self.logical_or()?; if op.ty == TokenType::PipeSlash || op.ty == TokenType::PipeBackslash { self.err_on_eof()?; if !self.expect(TokenType::Comma).0 { return Err(self.mk_error("Expected comma after first argument")) } let right2 = self.logical_or()?; expr = Expr::Ternary { arg1: Box::new(expr), arg2: Box::new(right), arg3: Box::new(right2), op } } else { expr = Expr::Binary { lhs: Box::new(expr), rhs: Box::new(right), op }; } } Ok(expr) } fn logical_or(&mut self) -> Result { self.expr(OpType::LogicalOr, Self::logical_and) } fn logical_and(&mut self) -> Result { self.expr(OpType::LogicalAnd, Self::comparison) } fn comparison(&mut self) -> Result { self.expr(OpType::Comparison, Self::additive) } fn additive(&mut self) -> Result { self.expr(OpType::Additive, Self::multiplicative) } fn multiplicative(&mut self) -> Result { self.expr(OpType::Multiplicative, Self::exponential) } // Right associative, so cannot use self.expr(..) fn exponential(&mut self) -> Result { let mut stack= vec![]; let mut expr = self.range()?; while !self.at_end() && self.peek().ty == TokenType::Caret { let op = self.next(); stack.push((expr, op)); expr = self.range()?; } while let Some(item) = stack.pop() { expr = Expr::Binary{ lhs: Box::new(item.0), rhs: Box::new(expr), op: item.1 }; } Ok(expr) } fn range(&mut self) -> Result { let start = self.unary()?; if !self.at_end() && self.peek().ty == TokenType::DoubleDot { self.next(); // consume = if inclusive let incl = if !self.at_end() && self.peek().ty == TokenType::Equal { self.next(); true } else { false }; // consume end number or * for endless let end = if !incl && !self.at_end() && self.peek().ty == TokenType::Star { self.next(); None } else { Some(self.unary()?) }; // consume :step if it exists let step = if !self.at_end() && self.peek().ty == TokenType::Colon { self.next(); Some(self.unary()?) } else { None }; Ok(Expr::Range { start: Box::new(start), end: end.map(|x| Box::new(x)), step: step.map(|x| Box::new(x)), incl }) } else { Ok(start) } } fn unary(&mut self) -> Result { self.err_on_eof()?; if matches!(self.peek().ty, TokenType::Bang | TokenType::Minus) { let op = self.next(); Ok(Expr::Unary { arg: Box::new(self.fncall()?), op }) } else { self.fncall() } } fn fncall(&mut self) -> Result { let mut expr = self.expr_base()?; while !self.at_end() { match self.peek().ty { TokenType::LParen => expr = self.fncall_inner(expr)?, TokenType::LBrack => expr = self.arrindex_inner(expr)?, _ => return Ok(expr) } } Ok(expr) } fn fncall_inner(&mut self, expr: Expr) -> Result { let lparen = self.next(); let args = self.commalist(TokenType::RParen, Self::assignment)?; Ok(Expr::FuncCall { func: Box::new(expr), args, pos: lparen.pos }) } fn arrindex_inner(&mut self, expr: Expr) -> Result { let lbrack = self.next(); let index = self.assignment()?; self.err_on_eof()?; if !self.expect(TokenType::RBrack).0 { return Err(self.mk_error("Expected RBrack after collection index")) } Ok(Expr::Index { lhs: Box::new(expr), index: Box::new(index), pos: lbrack.pos }) } fn kv_pair(&mut self) -> Result<(Expr, Expr), ParserError> { let key = self.assignment()?; self.err_on_eof()?; if !self.expect(TokenType::Colon).0 { return Err(self.mk_error("Expected colon in key-value pair")) } self.err_on_eof()?; let value = self.assignment()?; Ok((key, value)) } fn expr_base(&mut self) -> Result { self.err_on_eof()?; let next = self.next(); if matches!(next.ty, TokenType::True | TokenType::False | TokenType::Nil | TokenType::Int(_) | TokenType::Float(_) | TokenType::ImFloat(_) | TokenType::String(_) | TokenType::Char(_) ) { Ok(Expr::Literal { value: next }) } else if let TokenType::Ident(..) = next.ty { Ok(Expr::Ident { value: next }) } else if next.ty == TokenType::LParen { let expr = self.assignment()?; if self.at_end() || TokenType::RParen != self.next().ty { Err(self.mk_error("Left parenthesis never closed")) } else { Ok(expr) } } else if next.ty == TokenType::LBrack { let items = self.commalist(TokenType::RBrack, Self::assignment)?; Ok(Expr::List { items }) } else if next.ty == TokenType::LBrace { let items = self.commalist(TokenType::RBrace, Self::kv_pair)?; Ok(Expr::Map { items }) } else if next.ty == TokenType::Fn { self.err_on_eof()?; if !self.expect(TokenType::LParen).0 { return Err(self.mk_error("Expected left parenthesis to start arguments list")) } let args = self.commalist(TokenType::RParen, Self::ident)?; self.err_on_eof()?; let body = self.statement(false)?; Ok(Expr::Fn { args, body: Box::new(body) }) } else { Err(self.mk_error(format!("Unexpected token: {:?}", next.ty))) } } }