275 lines
7.4 KiB
Rust
275 lines
7.4 KiB
Rust
use std::borrow::Cow;
|
|
|
|
use lazy_static::lazy_static;
|
|
use regex::{Captures, Match, Regex};
|
|
use talc_lang::{
|
|
exception::{exception, Result},
|
|
lstring::LString,
|
|
symbol::{Symbol, SYM_TYPE_ERROR, SYM_VALUE_ERROR},
|
|
throw,
|
|
value::{NativeValue, Value},
|
|
vm::Vm,
|
|
};
|
|
use talc_macros::native_func;
|
|
|
|
use crate::unpack_args;
|
|
|
|
lazy_static! {
|
|
static ref SYM_STD_REGEX: Symbol = Symbol::get("std.regex");
|
|
static ref SYM_START: Symbol = Symbol::get("start");
|
|
static ref SYM_END: Symbol = Symbol::get("end");
|
|
static ref SYM_STR: Symbol = Symbol::get("str");
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct ValueRegex(Regex);
|
|
|
|
impl From<Regex> for ValueRegex {
|
|
fn from(value: Regex) -> Self {
|
|
Self(value)
|
|
}
|
|
}
|
|
|
|
impl From<ValueRegex> for Regex {
|
|
fn from(value: ValueRegex) -> Self {
|
|
value.0
|
|
}
|
|
}
|
|
|
|
impl NativeValue for ValueRegex {
|
|
fn get_type(&self) -> Symbol {
|
|
*SYM_STD_REGEX
|
|
}
|
|
fn as_any(&self) -> &dyn std::any::Any {
|
|
self
|
|
}
|
|
fn to_lstring(
|
|
&self,
|
|
w: &mut LString,
|
|
repr: bool,
|
|
_recur: &mut Vec<*const ()>,
|
|
) -> std::io::Result<()> {
|
|
use std::io::Write;
|
|
if repr {
|
|
write!(w, "/{}/", self.0)
|
|
} else {
|
|
write!(w, "{}", self.0)
|
|
}
|
|
}
|
|
fn copy_value(&self) -> Result<Option<Value>> {
|
|
Ok(Some(self.clone().into()))
|
|
}
|
|
}
|
|
|
|
pub fn load(vm: &mut Vm) {
|
|
vm.set_global_name("regex", _regex().into());
|
|
vm.set_global_name("matches", matches().into());
|
|
vm.set_global_name("match", _match().into());
|
|
vm.set_global_name("match_once", match_once().into());
|
|
vm.set_global_name("captures", captures().into());
|
|
vm.set_global_name("captures_once", captures_once().into());
|
|
vm.set_global_name("replace", replace().into());
|
|
vm.set_global_name("replace_once", replace_once().into());
|
|
vm.set_global_name("split", split().into());
|
|
vm.set_global_name("split_once", split_once().into());
|
|
}
|
|
|
|
fn match_to_value(m: Match) -> Value {
|
|
Value::new_table(|t| {
|
|
t.insert((*SYM_START).into(), (m.start() as i64).into());
|
|
t.insert((*SYM_END).into(), (m.end() as i64).into());
|
|
t.insert(
|
|
(*SYM_STR).into(),
|
|
LString::from(m.as_str().to_string()).into(),
|
|
);
|
|
})
|
|
}
|
|
|
|
fn captures_to_value(cs: Captures) -> Value {
|
|
cs.iter()
|
|
.map(|c| c.map_or(Value::Nil, match_to_value))
|
|
.collect::<Vec<Value>>()
|
|
.into()
|
|
}
|
|
|
|
fn regex_from<'a>(v: &'a Value, name: &str) -> Result<Cow<'a, Regex>> {
|
|
match v {
|
|
Value::String(s) => {
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "regex must be valid UTF-8")
|
|
};
|
|
Regex::new(s)
|
|
.map(Cow::Owned)
|
|
.map_err(|e| exception!(*SYM_VALUE_ERROR, "invalid regex: {e}"))
|
|
}
|
|
Value::Native(n) if n.get_type() == *SYM_STD_REGEX => n
|
|
.as_any()
|
|
.downcast_ref::<ValueRegex>()
|
|
.map(|vr| Cow::Borrowed(&vr.0))
|
|
.ok_or_else(|| {
|
|
exception!(
|
|
*SYM_TYPE_ERROR,
|
|
"{name} expected string or regex, got {v:#}"
|
|
)
|
|
}),
|
|
_ => throw!(
|
|
*SYM_TYPE_ERROR,
|
|
"{name} expected string or regex, got {v:#}"
|
|
),
|
|
}
|
|
}
|
|
|
|
#[native_func(1)]
|
|
pub fn _regex(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re] = unpack_args!(args);
|
|
regex_from(&re, "regex").map(|re| ValueRegex(re.into_owned()).into())
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn matches(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "matches expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "matches")?;
|
|
Ok(re.is_match(s).into())
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn match_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "match_once expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "match_once")?;
|
|
Ok(re.find(s).map_or(Value::Nil, match_to_value))
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn _match(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "match expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "match")?;
|
|
Ok(re
|
|
.find_iter(s)
|
|
.map(match_to_value)
|
|
.collect::<Vec<Value>>()
|
|
.into())
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn captures_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "captures_once expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "captures_once")?;
|
|
Ok(re.captures(s).map_or(Value::Nil, captures_to_value))
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn captures(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "captures expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "captures")?;
|
|
Ok(re
|
|
.captures_iter(s)
|
|
.map(captures_to_value)
|
|
.collect::<Vec<Value>>()
|
|
.into())
|
|
}
|
|
|
|
#[native_func(3)]
|
|
pub fn replace_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, rep, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "replace_once expected string, got {s:#}")
|
|
};
|
|
let Value::String(rep) = rep else {
|
|
throw!(
|
|
*SYM_TYPE_ERROR,
|
|
"replace_once expected string or function, got {rep:#}"
|
|
)
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let Ok(rep) = rep.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "replacement string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "replace_once")?;
|
|
Ok(LString::from(re.replace(s, rep)).into())
|
|
}
|
|
|
|
#[native_func(3)]
|
|
pub fn replace(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, rep, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "replace expected string, got {s:#}")
|
|
};
|
|
let Value::String(rep) = rep else {
|
|
throw!(
|
|
*SYM_TYPE_ERROR,
|
|
"replace expected string or function, got {rep:#}"
|
|
)
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "search string must be valid UTF-8")
|
|
};
|
|
let Ok(rep) = rep.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "replacement string must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "replace")?;
|
|
Ok(LString::from(re.replace_all(s, rep)).into())
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn split_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "split_once expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "string to split must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "split_once")?;
|
|
let mut parts = re.splitn(s, 2);
|
|
let (part1, part2) = (
|
|
LString::from(parts.next().unwrap_or_default()).into(),
|
|
LString::from(parts.next().unwrap_or_default()).into(),
|
|
);
|
|
Ok(vec![part1, part2].into())
|
|
}
|
|
|
|
#[native_func(2)]
|
|
pub fn split(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
|
let [_, re, s] = unpack_args!(args);
|
|
let Value::String(s) = s else {
|
|
throw!(*SYM_TYPE_ERROR, "split expected string, got {s:#}")
|
|
};
|
|
let Ok(s) = s.to_str() else {
|
|
throw!(*SYM_VALUE_ERROR, "string to split must be valid UTF-8")
|
|
};
|
|
let re = regex_from(&re, "split")?;
|
|
let parts: Vec<Value> = re.split(s).map(|s| LString::from(s).into()).collect();
|
|
Ok(parts.into())
|
|
}
|