use std::borrow::Cow; use lazy_static::lazy_static; use regex::{Captures, Match, Regex}; use talc_lang::{ exception::{exception, Result}, lstring::LString, symbol::{Symbol, SYM_TYPE_ERROR, SYM_VALUE_ERROR}, throw, value::{NativeValue, Value}, vm::Vm, }; use talc_macros::native_func; use crate::unpack_args; lazy_static! { static ref SYM_STD_REGEX: Symbol = Symbol::get("std.regex"); static ref SYM_START: Symbol = Symbol::get("start"); static ref SYM_END: Symbol = Symbol::get("end"); static ref SYM_STR: Symbol = Symbol::get("str"); } #[derive(Clone, Debug)] pub struct ValueRegex(Regex); impl From for ValueRegex { fn from(value: Regex) -> Self { Self(value) } } impl From for Regex { fn from(value: ValueRegex) -> Self { value.0 } } impl NativeValue for ValueRegex { fn get_type(&self) -> Symbol { *SYM_STD_REGEX } fn as_any(&self) -> &dyn std::any::Any { self } fn to_lstring( &self, w: &mut LString, repr: bool, _recur: &mut Vec<*const ()>, ) -> std::io::Result<()> { use std::io::Write; if repr { write!(w, "/{}/", self.0) } else { write!(w, "{}", self.0) } } fn copy_value(&self) -> Result> { Ok(Some(self.clone().into())) } } pub fn load(vm: &mut Vm) { vm.set_global_name("regex", _regex().into()); vm.set_global_name("matches", matches().into()); vm.set_global_name("match", _match().into()); vm.set_global_name("match_once", match_once().into()); vm.set_global_name("captures", captures().into()); vm.set_global_name("captures_once", captures_once().into()); vm.set_global_name("replace", replace().into()); vm.set_global_name("replace_once", replace_once().into()); vm.set_global_name("split", split().into()); vm.set_global_name("split_once", split_once().into()); } fn match_to_value(m: Match) -> Value { Value::new_table(|t| { t.insert((*SYM_START).into(), (m.start() as i64).into()); t.insert((*SYM_END).into(), (m.end() as i64).into()); t.insert( (*SYM_STR).into(), LString::from(m.as_str().to_string()).into(), ); }) } fn captures_to_value(cs: Captures) -> Value { cs.iter() .map(|c| c.map_or(Value::Nil, match_to_value)) .collect::>() .into() } fn regex_from<'a>(v: &'a Value, name: &str) -> Result> { match v { Value::String(s) => { let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "regex must be valid UTF-8") }; Regex::new(s) .map(Cow::Owned) .map_err(|e| exception!(*SYM_VALUE_ERROR, "invalid regex: {e}")) } Value::Native(n) if n.get_type() == *SYM_STD_REGEX => n .as_any() .downcast_ref::() .map(|vr| Cow::Borrowed(&vr.0)) .ok_or_else(|| { exception!( *SYM_TYPE_ERROR, "{name} expected string or regex, got {v:#}" ) }), _ => throw!( *SYM_TYPE_ERROR, "{name} expected string or regex, got {v:#}" ), } } #[native_func(1)] pub fn _regex(_: &mut Vm, args: Vec) -> Result { let [_, re] = unpack_args!(args); regex_from(&re, "regex").map(|re| ValueRegex(re.into_owned()).into()) } #[native_func(2)] pub fn matches(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "matches expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let re = regex_from(&re, "matches")?; Ok(re.is_match(s).into()) } #[native_func(2)] pub fn match_once(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "match_once expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let re = regex_from(&re, "match_once")?; Ok(re.find(s).map_or(Value::Nil, match_to_value)) } #[native_func(2)] pub fn _match(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "match expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let re = regex_from(&re, "match")?; Ok(re .find_iter(s) .map(match_to_value) .collect::>() .into()) } #[native_func(2)] pub fn captures_once(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "captures_once expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let re = regex_from(&re, "captures_once")?; Ok(re.captures(s).map_or(Value::Nil, captures_to_value)) } #[native_func(2)] pub fn captures(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "captures expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let re = regex_from(&re, "captures")?; Ok(re .captures_iter(s) .map(captures_to_value) .collect::>() .into()) } #[native_func(3)] pub fn replace_once(_: &mut Vm, args: Vec) -> Result { let [_, re, rep, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "replace_once expected string, got {s:#}") }; let Value::String(rep) = rep else { throw!( *SYM_TYPE_ERROR, "replace_once expected string or function, got {rep:#}" ) }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let Ok(rep) = rep.to_str() else { throw!(*SYM_VALUE_ERROR, "replacement string must be valid UTF-8") }; let re = regex_from(&re, "replace_once")?; Ok(LString::from(re.replace(s, rep)).into()) } #[native_func(3)] pub fn replace(_: &mut Vm, args: Vec) -> Result { let [_, re, rep, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "replace expected string, got {s:#}") }; let Value::String(rep) = rep else { throw!( *SYM_TYPE_ERROR, "replace expected string or function, got {rep:#}" ) }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8") }; let Ok(rep) = rep.to_str() else { throw!(*SYM_VALUE_ERROR, "replacement string must be valid UTF-8") }; let re = regex_from(&re, "replace")?; Ok(LString::from(re.replace_all(s, rep)).into()) } #[native_func(2)] pub fn split_once(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "split_once expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "string to split must be valid UTF-8") }; let re = regex_from(&re, "split_once")?; let mut parts = re.splitn(s, 2); let (part1, part2) = ( LString::from(parts.next().unwrap_or_default()).into(), LString::from(parts.next().unwrap_or_default()).into(), ); Ok(vec![part1, part2].into()) } #[native_func(2)] pub fn split(_: &mut Vm, args: Vec) -> Result { let [_, re, s] = unpack_args!(args); let Value::String(s) = s else { throw!(*SYM_TYPE_ERROR, "split expected string, got {s:#}") }; let Ok(s) = s.to_str() else { throw!(*SYM_VALUE_ERROR, "string to split must be valid UTF-8") }; let re = regex_from(&re, "split")?; let parts: Vec = re.split(s).map(|s| LString::from(s).into()).collect(); Ok(parts.into()) }