236 lines
7.2 KiB
Rust
236 lines
7.2 KiB
Rust
|
use std::borrow::Cow;
|
||
|
|
||
|
use talc_lang::{exception::{exception, Result}, lstring::LString, symbol::{Symbol, SYM_TYPE_ERROR}, throw, value::{NativeValue, Value}, Vm};
|
||
|
use talc_macros::native_func;
|
||
|
use regex::{Captures, Match, Regex};
|
||
|
use lazy_static::lazy_static;
|
||
|
|
||
|
use crate::unpack_args;
|
||
|
|
||
|
lazy_static! {
|
||
|
static ref SYM_STD_REGEX: Symbol = Symbol::get("std.regex");
|
||
|
static ref SYM_START: Symbol = Symbol::get("start");
|
||
|
static ref SYM_END: Symbol = Symbol::get("end");
|
||
|
static ref SYM_STR: Symbol = Symbol::get("str");
|
||
|
}
|
||
|
|
||
|
#[derive(Clone, Debug)]
|
||
|
pub struct ValueRegex(Regex);
|
||
|
|
||
|
impl From<Regex> for ValueRegex {
|
||
|
fn from(value: Regex) -> Self { Self(value) }
|
||
|
}
|
||
|
|
||
|
impl From<ValueRegex> for Regex {
|
||
|
fn from(value: ValueRegex) -> Self { value.0 }
|
||
|
}
|
||
|
|
||
|
impl NativeValue for ValueRegex {
|
||
|
fn get_type(&self) -> Symbol { *SYM_STD_REGEX }
|
||
|
fn as_any(&self) -> &dyn std::any::Any { self }
|
||
|
fn to_lstring(&self, w: &mut LString, repr: bool) -> std::io::Result<()> {
|
||
|
use std::io::Write;
|
||
|
if repr {
|
||
|
write!(w, "/{}/", self.0)
|
||
|
} else {
|
||
|
write!(w, "{}", self.0)
|
||
|
}
|
||
|
}
|
||
|
fn copy_value(&self) -> Result<Option<Value>> {
|
||
|
Ok(Some(self.clone().into()))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pub fn load(vm: &mut Vm) {
|
||
|
vm.set_global_name("regex", _regex().into());
|
||
|
vm.set_global_name("matches", matches().into());
|
||
|
vm.set_global_name("match", _match().into());
|
||
|
vm.set_global_name("match_once", match_once().into());
|
||
|
vm.set_global_name("captures", captures().into());
|
||
|
vm.set_global_name("captures_once", captures_once().into());
|
||
|
vm.set_global_name("replace", replace().into());
|
||
|
vm.set_global_name("replace_once", replace_once().into());
|
||
|
vm.set_global_name("split", split().into());
|
||
|
vm.set_global_name("split_once", split_once().into());
|
||
|
}
|
||
|
|
||
|
fn match_to_value(m: Match) -> Value {
|
||
|
Value::new_table(|t| {
|
||
|
t.insert((*SYM_START).into(), (m.start() as i64).into());
|
||
|
t.insert((*SYM_END).into(), (m.end() as i64).into());
|
||
|
t.insert((*SYM_STR).into(), LString::from(m.as_str().to_string()).into());
|
||
|
})
|
||
|
}
|
||
|
|
||
|
fn captures_to_value(cs: Captures) -> Value {
|
||
|
cs.iter()
|
||
|
.map(|c| c.map_or(Value::Nil, match_to_value))
|
||
|
.collect::<Vec<Value>>()
|
||
|
.into()
|
||
|
}
|
||
|
|
||
|
fn regex_from<'a>(v: &'a Value, name: &str) -> Result<Cow<'a, Regex>> {
|
||
|
match v {
|
||
|
Value::String(s) => {
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "regex must be valid UTF-8")
|
||
|
};
|
||
|
Regex::new(s)
|
||
|
.map(Cow::Owned)
|
||
|
.map_err(|e| exception!(*SYM_TYPE_ERROR, "invalid regex: {e}"))
|
||
|
},
|
||
|
Value::Native(n) if n.get_type() == *SYM_STD_REGEX => {
|
||
|
n.as_any().downcast_ref::<ValueRegex>()
|
||
|
.map(|vr| Cow::Borrowed(&vr.0))
|
||
|
.ok_or_else(|| exception!(
|
||
|
*SYM_TYPE_ERROR, "BEES {name} expected string or regex, got {v:#}"))
|
||
|
},
|
||
|
_ => throw!(*SYM_TYPE_ERROR, "{name} expected string or regex, got {v:#}")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[native_func(1)]
|
||
|
pub fn _regex(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re] = unpack_args!(args);
|
||
|
regex_from(&re, "regex")
|
||
|
.map(|re| ValueRegex(re.into_owned()).into())
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn matches(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "matches expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "matches")?;
|
||
|
Ok(re.is_match(s).into())
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn match_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "match_once expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "match_once")?;
|
||
|
Ok(re.find(s).map_or(Value::Nil, match_to_value))
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn _match(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "match expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "match")?;
|
||
|
Ok(re.find_iter(s).map(match_to_value).collect::<Vec<Value>>().into())
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn captures_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "captures_once expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "captures_once")?;
|
||
|
Ok(re.captures(s).map_or(Value::Nil, captures_to_value))
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn captures(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "captures expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "captures")?;
|
||
|
Ok(re.captures_iter(s).map(captures_to_value).collect::<Vec<Value>>().into())
|
||
|
}
|
||
|
|
||
|
#[native_func(3)]
|
||
|
pub fn replace_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, rep, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "replace_once expected string, got {s:#}")
|
||
|
};
|
||
|
let Value::String(rep) = rep else {
|
||
|
throw!(*SYM_TYPE_ERROR, "replace_once expected string or function, got {rep:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let Ok(rep) = rep.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "replacement string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "replace_once")?;
|
||
|
Ok(LString::from(re.replace(s, rep)).into())
|
||
|
}
|
||
|
|
||
|
#[native_func(3)]
|
||
|
pub fn replace(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, rep, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "replace expected string, got {s:#}")
|
||
|
};
|
||
|
let Value::String(rep) = rep else {
|
||
|
throw!(*SYM_TYPE_ERROR, "replace expected string or function, got {rep:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
|
||
|
};
|
||
|
let Ok(rep) = rep.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "replacement string must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "replace")?;
|
||
|
Ok(LString::from(re.replace_all(s, rep)).into())
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn split_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "split_once expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "string to split must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "split_once")?;
|
||
|
let mut parts = re.splitn(s, 2);
|
||
|
let (part1, part2) = (
|
||
|
LString::from(parts.next().unwrap_or_default()).into(),
|
||
|
LString::from(parts.next().unwrap_or_default()).into()
|
||
|
);
|
||
|
Ok(vec![part1, part2].into())
|
||
|
}
|
||
|
|
||
|
#[native_func(2)]
|
||
|
pub fn split(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
|
||
|
let [_, re, s] = unpack_args!(args);
|
||
|
let Value::String(s) = s else {
|
||
|
throw!(*SYM_TYPE_ERROR, "split expected string, got {s:#}")
|
||
|
};
|
||
|
let Ok(s) = s.to_str() else {
|
||
|
throw!(*SYM_TYPE_ERROR, "string to split must be valid UTF-8")
|
||
|
};
|
||
|
let re = regex_from(&re, "split")?;
|
||
|
let parts: Vec<Value> = re.split(s)
|
||
|
.map(|s| LString::from(s).into())
|
||
|
.collect();
|
||
|
Ok(parts.into())
|
||
|
}
|
||
|
|