talc/talc-std/src/regex.rs
2024-11-14 14:16:33 -05:00

275 lines
7.4 KiB
Rust

use std::borrow::Cow;
use lazy_static::lazy_static;
use regex::{Captures, Match, Regex};
use talc_lang::{
exception::{exception, Result},
lstring::LString,
symbol::{Symbol, SYM_TYPE_ERROR, SYM_VALUE_ERROR},
throw,
value::{NativeValue, Value},
vm::Vm,
};
use talc_macros::native_func;
use crate::unpack_args;
lazy_static! {
static ref SYM_STD_REGEX: Symbol = Symbol::get("std.regex");
static ref SYM_START: Symbol = Symbol::get("start");
static ref SYM_END: Symbol = Symbol::get("end");
static ref SYM_STR: Symbol = Symbol::get("str");
}
#[derive(Clone, Debug)]
pub struct ValueRegex(Regex);
impl From<Regex> for ValueRegex {
fn from(value: Regex) -> Self {
Self(value)
}
}
impl From<ValueRegex> for Regex {
fn from(value: ValueRegex) -> Self {
value.0
}
}
impl NativeValue for ValueRegex {
fn get_type(&self) -> Symbol {
*SYM_STD_REGEX
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn to_lstring(
&self,
w: &mut LString,
repr: bool,
_recur: &mut Vec<*const ()>,
) -> std::io::Result<()> {
use std::io::Write;
if repr {
write!(w, "/{}/", self.0)
} else {
write!(w, "{}", self.0)
}
}
fn copy_value(&self) -> Result<Option<Value>> {
Ok(Some(self.clone().into()))
}
}
pub fn load(vm: &mut Vm) {
vm.set_global_name("regex", _regex().into());
vm.set_global_name("matches", matches().into());
vm.set_global_name("match", _match().into());
vm.set_global_name("match_once", match_once().into());
vm.set_global_name("captures", captures().into());
vm.set_global_name("captures_once", captures_once().into());
vm.set_global_name("replace", replace().into());
vm.set_global_name("replace_once", replace_once().into());
vm.set_global_name("split", split().into());
vm.set_global_name("split_once", split_once().into());
}
fn match_to_value(m: Match) -> Value {
Value::new_table(|t| {
t.insert((*SYM_START).into(), (m.start() as i64).into());
t.insert((*SYM_END).into(), (m.end() as i64).into());
t.insert(
(*SYM_STR).into(),
LString::from(m.as_str().to_string()).into(),
);
})
}
fn captures_to_value(cs: Captures) -> Value {
cs.iter()
.map(|c| c.map_or(Value::Nil, match_to_value))
.collect::<Vec<Value>>()
.into()
}
fn regex_from<'a>(v: &'a Value, name: &str) -> Result<Cow<'a, Regex>> {
match v {
Value::String(s) => {
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "regex must be valid UTF-8")
};
Regex::new(s)
.map(Cow::Owned)
.map_err(|e| exception!(*SYM_VALUE_ERROR, "invalid regex: {e}"))
}
Value::Native(n) if n.get_type() == *SYM_STD_REGEX => n
.as_any()
.downcast_ref::<ValueRegex>()
.map(|vr| Cow::Borrowed(&vr.0))
.ok_or_else(|| {
exception!(
*SYM_TYPE_ERROR,
"{name} expected string or regex, got {v:#}"
)
}),
_ => throw!(
*SYM_TYPE_ERROR,
"{name} expected string or regex, got {v:#}"
),
}
}
#[native_func(1)]
pub fn _regex(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re] = unpack_args!(args);
regex_from(&re, "regex").map(|re| ValueRegex(re.into_owned()).into())
}
#[native_func(2)]
pub fn matches(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "matches expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "matches")?;
Ok(re.is_match(s).into())
}
#[native_func(2)]
pub fn match_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "match_once expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "match_once")?;
Ok(re.find(s).map_or(Value::Nil, match_to_value))
}
#[native_func(2)]
pub fn _match(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "match expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "match")?;
Ok(re
.find_iter(s)
.map(match_to_value)
.collect::<Vec<Value>>()
.into())
}
#[native_func(2)]
pub fn captures_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "captures_once expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "captures_once")?;
Ok(re.captures(s).map_or(Value::Nil, captures_to_value))
}
#[native_func(2)]
pub fn captures(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "captures expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "captures")?;
Ok(re
.captures_iter(s)
.map(captures_to_value)
.collect::<Vec<Value>>()
.into())
}
#[native_func(3)]
pub fn replace_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, rep, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "replace_once expected string, got {s:#}")
};
let Value::String(rep) = rep else {
throw!(
*SYM_TYPE_ERROR,
"replace_once expected string or function, got {rep:#}"
)
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let Ok(rep) = rep.to_str() else {
throw!(*SYM_VALUE_ERROR, "replacement string must be valid UTF-8")
};
let re = regex_from(&re, "replace_once")?;
Ok(LString::from(re.replace(s, rep)).into())
}
#[native_func(3)]
pub fn replace(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, rep, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "replace expected string, got {s:#}")
};
let Value::String(rep) = rep else {
throw!(
*SYM_TYPE_ERROR,
"replace expected string or function, got {rep:#}"
)
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
};
let Ok(rep) = rep.to_str() else {
throw!(*SYM_VALUE_ERROR, "replacement string must be valid UTF-8")
};
let re = regex_from(&re, "replace")?;
Ok(LString::from(re.replace_all(s, rep)).into())
}
#[native_func(2)]
pub fn split_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "split_once expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "string to split must be valid UTF-8")
};
let re = regex_from(&re, "split_once")?;
let mut parts = re.splitn(s, 2);
let (part1, part2) = (
LString::from(parts.next().unwrap_or_default()).into(),
LString::from(parts.next().unwrap_or_default()).into(),
);
Ok(vec![part1, part2].into())
}
#[native_func(2)]
pub fn split(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "split expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_VALUE_ERROR, "string to split must be valid UTF-8")
};
let re = regex_from(&re, "split")?;
let parts: Vec<Value> = re.split(s).map(|s| LString::from(s).into()).collect();
Ok(parts.into())
}