talc/talc-lang/src/parser/util.rs

172 lines
3.9 KiB
Rust
Raw Normal View History

2024-11-03 12:50:36 -05:00
use core::fmt;
2024-11-04 13:25:31 -05:00
use std::num::{ParseFloatError, ParseIntError};
2024-02-21 11:04:18 -05:00
use thiserror::Error;
2024-03-30 12:21:09 -04:00
use crate::lstring::{LStr, LString};
2024-11-03 12:50:36 -05:00
use super::Span;
2024-02-21 11:04:18 -05:00
#[derive(Clone, Debug, Error)]
2024-11-03 12:50:36 -05:00
pub struct ParserError {
2024-11-04 13:25:31 -05:00
pub span: Span,
pub msg: String,
2024-11-03 12:50:36 -05:00
}
impl fmt::Display for ParserError {
2024-11-04 13:25:31 -05:00
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} | {}", self.span, self.msg)
}
2024-11-03 12:50:36 -05:00
}
pub trait SpanParserError {
2024-11-04 13:25:31 -05:00
type Output;
fn span_err(self, span: Span) -> Self::Output;
2024-11-03 12:50:36 -05:00
}
impl<T, E: std::error::Error> SpanParserError for Result<T, E> {
2024-11-04 13:25:31 -05:00
type Output = Result<T, ParserError>;
fn span_err(self, span: Span) -> Self::Output {
self.map_err(|e| ParserError {
span,
msg: e.to_string(),
})
}
2024-02-21 11:04:18 -05:00
}
#[derive(Clone, Copy, Debug, Error)]
pub enum StrEscapeError {
#[error("EOF in string escape")]
Eof,
#[error("EOF in string escape \\x")]
HexEof,
#[error("EOF in string escape \\u")]
UnicodeEof,
2024-11-03 12:50:36 -05:00
#[error("invalid string escape \\{0}")]
2024-02-21 11:04:18 -05:00
Invalid(char),
2024-11-03 12:50:36 -05:00
#[error("invalid hex digit '{0}' in string escape")]
2024-02-21 11:04:18 -05:00
InvalidHex(char),
2024-11-03 12:50:36 -05:00
#[error("missing brace after string escape \\u")]
2024-02-21 11:04:18 -05:00
MissingBrace,
2024-11-03 12:50:36 -05:00
#[error("invalid codepoint in string escape: {0:x}")]
2024-02-21 11:04:18 -05:00
InvalidCodepoint(u32),
2024-11-03 12:50:36 -05:00
#[error("codepoint in string escape too large")]
2024-02-21 11:04:18 -05:00
CodepointTooLarge,
}
2024-03-30 12:21:09 -04:00
pub fn parse_str_escapes(src: &str) -> Result<LString, StrEscapeError> {
let mut s = LString::with_capacity(src.len());
2024-02-21 11:04:18 -05:00
let mut chars = src.chars();
while let Some(c) = chars.next() {
2024-11-04 13:25:31 -05:00
if c != '\\' {
s.push_char(c);
continue
}
2024-02-21 11:04:18 -05:00
let c = chars.next().ok_or(StrEscapeError::Eof)?;
match c {
2024-03-30 12:21:09 -04:00
'"' | '\'' | '\\' => s.push_char(c),
2024-11-04 13:25:31 -05:00
'0' => s.push_char('\0'),
'a' => s.push_char('\x07'),
'b' => s.push_char('\x08'),
't' => s.push_char('\t'),
'n' => s.push_char('\n'),
'v' => s.push_char('\x0b'),
'f' => s.push_char('\x0c'),
'r' => s.push_char('\r'),
'e' => s.push_char('\x1b'),
2024-02-21 11:04:18 -05:00
'x' => {
let c = chars.next().ok_or(StrEscapeError::HexEof)?;
let n1 = c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?;
let c = chars.next().ok_or(StrEscapeError::HexEof)?;
let n2 = c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?;
2024-03-30 12:21:09 -04:00
s.push_byte((n1 * 16 + n2) as u8);
2024-11-04 13:25:31 -05:00
}
2024-02-21 11:04:18 -05:00
'u' => {
let Some('{') = chars.next() else {
return Err(StrEscapeError::MissingBrace)
};
2024-11-03 12:50:36 -05:00
let mut n = 0_u32;
2024-02-21 11:04:18 -05:00
loop {
let Some(c) = chars.next() else {
return Err(StrEscapeError::UnicodeEof)
};
2024-11-04 13:25:31 -05:00
if c == '}' {
break
}
2024-11-03 12:50:36 -05:00
if n > 0x10ffff {
2024-02-21 11:04:18 -05:00
return Err(StrEscapeError::CodepointTooLarge)
}
n = n * 16 + c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?;
}
let ch = char::from_u32(n).ok_or(StrEscapeError::InvalidCodepoint(n))?;
2024-03-30 12:21:09 -04:00
s.push_char(ch);
2024-11-04 13:25:31 -05:00
}
2024-02-21 11:04:18 -05:00
c => return Err(StrEscapeError::Invalid(c)),
}
}
Ok(s)
}
2024-03-30 12:21:09 -04:00
pub fn parse_float<'a, S: Into<&'a LStr>>(f: S) -> Result<f64, ParseFloatError> {
2024-02-21 11:04:18 -05:00
let mut s = String::new();
2024-03-30 12:21:09 -04:00
for c in f.into().chars() {
2024-02-21 11:04:18 -05:00
if c != '_' {
s.push(c);
}
}
s.parse()
}
2024-03-30 12:21:09 -04:00
pub fn parse_int<'a, S: Into<&'a LStr>>(f: S, radix: u32) -> Result<i64, ParseIntError> {
2024-02-21 11:04:18 -05:00
let mut s = String::new();
2024-03-30 12:21:09 -04:00
for c in f.into().chars() {
2024-02-21 11:04:18 -05:00
if c != '_' {
s.push(c);
}
}
i64::from_str_radix(&s, radix)
}
2024-11-03 12:50:36 -05:00
pub fn parse_int_literal<'a, S: Into<&'a LStr>>(f: S) -> Result<i64, ParseIntError> {
2024-11-04 13:25:31 -05:00
let f = f.into();
match f.chars().nth(2) {
Some('x') => parse_int(&f[2..], 16),
Some('o') => parse_int(&f[2..], 8),
Some('s') => parse_int(&f[2..], 6),
Some('b') => parse_int(&f[2..], 2),
_ => parse_int(f, 10),
}
2024-11-03 12:50:36 -05:00
}
2024-11-04 12:59:05 -05:00
pub fn to_lstring_radix(n: i64, radix: u32, upper: bool) -> LString {
let mut result = vec![];
let mut begin = 0;
let mut x;
if n < 0 {
result.push('-' as u32 as u8);
begin = 1;
x = (-n) as u64;
} else {
x = n as u64;
}
loop {
let m = x % (radix as u64);
x /= radix as u64;
let mut c = char::from_digit(m as u32, radix).unwrap();
2024-11-04 13:25:31 -05:00
if upper {
c.make_ascii_uppercase();
}
2024-11-04 12:59:05 -05:00
result.push(c as u8);
if x == 0 {
2024-11-04 13:25:31 -05:00
break
2024-11-04 12:59:05 -05:00
}
}
result[begin..].reverse();
LString::from(result)
}