talc/talc-std/src/regex.rs

236 lines
7.2 KiB
Rust

use std::borrow::Cow;
use talc_lang::{exception::{exception, Result}, lstring::LString, symbol::{Symbol, SYM_TYPE_ERROR}, throw, value::{NativeValue, Value}, Vm};
use talc_macros::native_func;
use regex::{Captures, Match, Regex};
use lazy_static::lazy_static;
use crate::unpack_args;
lazy_static! {
static ref SYM_STD_REGEX: Symbol = Symbol::get("std.regex");
static ref SYM_START: Symbol = Symbol::get("start");
static ref SYM_END: Symbol = Symbol::get("end");
static ref SYM_STR: Symbol = Symbol::get("str");
}
#[derive(Clone, Debug)]
pub struct ValueRegex(Regex);
impl From<Regex> for ValueRegex {
fn from(value: Regex) -> Self { Self(value) }
}
impl From<ValueRegex> for Regex {
fn from(value: ValueRegex) -> Self { value.0 }
}
impl NativeValue for ValueRegex {
fn get_type(&self) -> Symbol { *SYM_STD_REGEX }
fn as_any(&self) -> &dyn std::any::Any { self }
fn to_lstring(&self, w: &mut LString, repr: bool) -> std::io::Result<()> {
use std::io::Write;
if repr {
write!(w, "/{}/", self.0)
} else {
write!(w, "{}", self.0)
}
}
fn copy_value(&self) -> Result<Option<Value>> {
Ok(Some(self.clone().into()))
}
}
pub fn load(vm: &mut Vm) {
vm.set_global_name("regex", _regex().into());
vm.set_global_name("matches", matches().into());
vm.set_global_name("match", _match().into());
vm.set_global_name("match_once", match_once().into());
vm.set_global_name("captures", captures().into());
vm.set_global_name("captures_once", captures_once().into());
vm.set_global_name("replace", replace().into());
vm.set_global_name("replace_once", replace_once().into());
vm.set_global_name("split", split().into());
vm.set_global_name("split_once", split_once().into());
}
fn match_to_value(m: Match) -> Value {
Value::new_table(|t| {
t.insert((*SYM_START).into(), (m.start() as i64).into());
t.insert((*SYM_END).into(), (m.end() as i64).into());
t.insert((*SYM_STR).into(), LString::from(m.as_str().to_string()).into());
})
}
fn captures_to_value(cs: Captures) -> Value {
cs.iter()
.map(|c| c.map_or(Value::Nil, match_to_value))
.collect::<Vec<Value>>()
.into()
}
fn regex_from<'a>(v: &'a Value, name: &str) -> Result<Cow<'a, Regex>> {
match v {
Value::String(s) => {
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "regex must be valid UTF-8")
};
Regex::new(s)
.map(Cow::Owned)
.map_err(|e| exception!(*SYM_TYPE_ERROR, "invalid regex: {e}"))
},
Value::Native(n) if n.get_type() == *SYM_STD_REGEX => {
n.as_any().downcast_ref::<ValueRegex>()
.map(|vr| Cow::Borrowed(&vr.0))
.ok_or_else(|| exception!(
*SYM_TYPE_ERROR, "BEES {name} expected string or regex, got {v:#}"))
},
_ => throw!(*SYM_TYPE_ERROR, "{name} expected string or regex, got {v:#}")
}
}
#[native_func(1)]
pub fn _regex(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re] = unpack_args!(args);
regex_from(&re, "regex")
.map(|re| ValueRegex(re.into_owned()).into())
}
#[native_func(2)]
pub fn matches(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "matches expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "matches")?;
Ok(re.is_match(s).into())
}
#[native_func(2)]
pub fn match_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "match_once expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "match_once")?;
Ok(re.find(s).map_or(Value::Nil, match_to_value))
}
#[native_func(2)]
pub fn _match(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "match expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "match")?;
Ok(re.find_iter(s).map(match_to_value).collect::<Vec<Value>>().into())
}
#[native_func(2)]
pub fn captures_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "captures_once expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "captures_once")?;
Ok(re.captures(s).map_or(Value::Nil, captures_to_value))
}
#[native_func(2)]
pub fn captures(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "captures expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let re = regex_from(&re, "captures")?;
Ok(re.captures_iter(s).map(captures_to_value).collect::<Vec<Value>>().into())
}
#[native_func(3)]
pub fn replace_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, rep, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "replace_once expected string, got {s:#}")
};
let Value::String(rep) = rep else {
throw!(*SYM_TYPE_ERROR, "replace_once expected string or function, got {rep:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let Ok(rep) = rep.to_str() else {
throw!(*SYM_TYPE_ERROR, "replacement string must be valid UTF-8")
};
let re = regex_from(&re, "replace_once")?;
Ok(LString::from(re.replace(s, rep)).into())
}
#[native_func(3)]
pub fn replace(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, rep, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "replace expected string, got {s:#}")
};
let Value::String(rep) = rep else {
throw!(*SYM_TYPE_ERROR, "replace expected string or function, got {rep:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "search string must be valid UTF-8")
};
let Ok(rep) = rep.to_str() else {
throw!(*SYM_TYPE_ERROR, "replacement string must be valid UTF-8")
};
let re = regex_from(&re, "replace")?;
Ok(LString::from(re.replace_all(s, rep)).into())
}
#[native_func(2)]
pub fn split_once(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "split_once expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "string to split must be valid UTF-8")
};
let re = regex_from(&re, "split_once")?;
let mut parts = re.splitn(s, 2);
let (part1, part2) = (
LString::from(parts.next().unwrap_or_default()).into(),
LString::from(parts.next().unwrap_or_default()).into()
);
Ok(vec![part1, part2].into())
}
#[native_func(2)]
pub fn split(_: &mut Vm, args: Vec<Value>) -> Result<Value> {
let [_, re, s] = unpack_args!(args);
let Value::String(s) = s else {
throw!(*SYM_TYPE_ERROR, "split expected string, got {s:#}")
};
let Ok(s) = s.to_str() else {
throw!(*SYM_TYPE_ERROR, "string to split must be valid UTF-8")
};
let re = regex_from(&re, "split")?;
let parts: Vec<Value> = re.split(s)
.map(|s| LString::from(s).into())
.collect();
Ok(parts.into())
}