talc/talc-lang/src/parser_util.rs

107 lines
2.7 KiB
Rust

use std::num::{ParseIntError, ParseFloatError};
use thiserror::Error;
use crate::lstring::{LStr, LString};
#[derive(Clone, Debug, Error)]
pub enum ParseError {
#[error("{0}")]
StrEscape(#[from] StrEscapeError),
#[error("{0}")]
Integer(#[from] ParseIntError),
#[error("{0}")]
Float(#[from] ParseFloatError),
}
#[derive(Clone, Copy, Debug, Error)]
pub enum StrEscapeError {
#[error("EOF in string escape")]
Eof,
#[error("EOF in string escape \\x")]
HexEof,
#[error("EOF in string escape \\u")]
UnicodeEof,
#[error("Invalid string escape \\{0}")]
Invalid(char),
#[error("Invalid hex digit '{0}' in string escape")]
InvalidHex(char),
#[error("Missing brace after string escape \\u")]
MissingBrace,
#[error("Invalid codepoint in string escape: {0:x}")]
InvalidCodepoint(u32),
#[error("Codepoint in string escape too large")]
CodepointTooLarge,
}
pub fn parse_str_escapes(src: &str) -> Result<LString, StrEscapeError> {
let mut s = LString::with_capacity(src.len());
let mut chars = src.chars();
while let Some(c) = chars.next() {
if c != '\\' { s.push_char(c); continue }
let c = chars.next().ok_or(StrEscapeError::Eof)?;
match c {
'"' | '\'' | '\\' => s.push_char(c),
'0' => s.push_char('\0'),
'a' => s.push_char('\x07'),
'b' => s.push_char('\x08'),
't' => s.push_char('\t'),
'n' => s.push_char('\n'),
'v' => s.push_char('\x0b'),
'f' => s.push_char('\x0c'),
'r' => s.push_char('\r'),
'e' => s.push_char('\x1b'),
'x' => {
let c = chars.next().ok_or(StrEscapeError::HexEof)?;
let n1 = c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?;
let c = chars.next().ok_or(StrEscapeError::HexEof)?;
let n2 = c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?;
s.push_byte((n1 * 16 + n2) as u8);
},
'u' => {
let Some('{') = chars.next() else {
return Err(StrEscapeError::MissingBrace)
};
let mut n = 0u32;
loop {
let Some(c) = chars.next() else {
return Err(StrEscapeError::UnicodeEof)
};
if c == '}' { break }
if n >= 0x1000_0000u32 {
return Err(StrEscapeError::CodepointTooLarge)
}
n = n * 16 + c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?;
}
let ch = char::from_u32(n).ok_or(StrEscapeError::InvalidCodepoint(n))?;
s.push_char(ch);
},
c => return Err(StrEscapeError::Invalid(c)),
}
}
Ok(s)
}
pub fn parse_float<'a, S: Into<&'a LStr>>(f: S) -> Result<f64, ParseFloatError> {
let mut s = String::new();
for c in f.into().chars() {
if c != '_' {
s.push(c);
}
}
s.parse()
}
pub fn parse_int<'a, S: Into<&'a LStr>>(f: S, radix: u32) -> Result<i64, ParseIntError> {
let mut s = String::new();
for c in f.into().chars() {
if c != '_' {
s.push(c);
}
}
i64::from_str_radix(&s, radix)
}