diff --git a/.gitignore b/.gitignore index ea8c4bf..c32d3bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /target +config.json +guids +hook.sh diff --git a/Cargo.lock b/Cargo.lock index 95c0b82..21368ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,7 +59,7 @@ dependencies = [ "libc", "num-integer", "num-traits", - "time 0.1.44", + "time", "winapi", ] @@ -436,15 +436,6 @@ dependencies = [ "libc", ] -[[package]] -name = "num_threads" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" -dependencies = [ - "libc", -] - [[package]] name = "once_cell" version = "1.12.0" @@ -765,35 +756,16 @@ dependencies = [ "winapi", ] -[[package]] -name = "time" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217" -dependencies = [ - "itoa", - "libc", - "num_threads", - "time-macros", -] - -[[package]] -name = "time-macros" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" - [[package]] name = "tiny_http" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d6ef4e10d23c1efb862eecad25c5054429a71958b4eeef85eb5e7170b477ca" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" dependencies = [ "ascii", "chunked_transfer", + "httpdate", "log", - "time 0.3.11", - "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d86d707..525403d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,10 +6,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chrono = "0.4.19" -reqwest = { version = "0.11.11", features = ["blocking"]} -rss = { version = "2.0.1", default_features = false } -serde = { version = "1.0.138", features = ["derive"] } -serde_json = "1.0.82" -strfmt = "0.1.6" -tiny_http = "0.11.0" \ No newline at end of file +chrono = "0.4" +reqwest = { version = "0.11", features = ["blocking"]} +rss = { version = "2.0", default_features = false } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +strfmt = "0.1" +tiny_http = "0.12" diff --git a/README.md b/README.md index bec471e..03e4a2a 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ The following fields are optional: | `worker_threads` | Number of threads to spawn for the web server. | `4` | | `port` | Port number for web server | `4400` | | `host` | Host for web server | `127.0.0.1` | +| `hook` | Path to hook to run upon recieving new blog posts | `./hook.sh` | Here is an example configuration: @@ -46,6 +47,16 @@ Here is an example configuration: } ``` +## Hook + +Every time a new post is loaded, the hook program (if specified) will be executed. This is based on GUIDs, so feeds that do not fill the `guid` field will not trigger hooks. The following environment variables will be set: +- `TITLE` - the original title of the post +- `TITLE_FMT` - the formatted title of the post +- `AUTHOR` - the post's author +- `LINK` - the link to the post +- `GUID` - the post's GUID +- `PUB_DATE` - the post's publishing date + ## Status page RSS Bundler also generates a status page, available at `/status`. This page shows the last date a feed was fetched and parsed successfully and, if the last try was erroneous, the error that occured. If an error occurs while fetching or parsing a feed, the last good version will be used instead. diff --git a/src/config.rs b/src/config.rs index 94fd4e4..9d6577a 100644 --- a/src/config.rs +++ b/src/config.rs @@ -30,6 +30,9 @@ pub struct Config { #[serde(default="default_host")] pub host: String, + #[serde(default)] + pub hook: Option, + pub users: Vec, } @@ -37,4 +40,4 @@ pub struct Config { pub struct User { pub name: String, pub rss: String, -} \ No newline at end of file +} diff --git a/src/hooks.rs b/src/hooks.rs new file mode 100644 index 0000000..a3edf23 --- /dev/null +++ b/src/hooks.rs @@ -0,0 +1,24 @@ +use std::process::Command; + +pub struct HookData { + pub title: String, + pub title_fmt: String, + pub author: String, + pub link: String, + pub guid: String, + pub pub_date: String, +} + +pub fn run_hook(hook: String, hookdata: Vec) -> Result<(), std::io::Error> { + for data in hookdata { + Command::new(hook.clone()) + .env("TITLE", data.title) + .env("TITLE_FMT", data.title_fmt) + .env("AUTHOR", data.author) + .env("LINK", data.link) + .env("GUID", data.guid) + .env("PUB_DATE", data.pub_date) + .spawn()?; + } + Ok(()) +} diff --git a/src/junction.rs b/src/junction.rs index f1af966..12b774c 100644 --- a/src/junction.rs +++ b/src/junction.rs @@ -4,17 +4,19 @@ use chrono::{DateTime, SubsecRound}; use rss::Channel; use strfmt::strfmt; -use crate::Feed; +use crate::{Feed, State}; use crate::config::{Config, User}; +use crate::hooks::HookData; -pub fn bundle_rss(feeds: &HashMap, config: &Config) -> Channel { +pub fn bundle_rss(state: &mut State, config: &Config) -> (Vec, Channel) { let mut bundle = Channel::default(); bundle.set_title(&config.title); bundle.set_link(&config.link); bundle.description = config.description.clone(); bundle.set_generator(Some("RSS Bundler".into())); + let mut hookdata = Vec::new(); let mut most_recent_date = None; - for (user, feed) in feeds { + for (user, feed) in &state.feeds { if let Some(channel) = &feed.channel { for item in channel.items() { if let Some(pub_date) = &item.pub_date { @@ -27,6 +29,9 @@ pub fn bundle_rss(feeds: &HashMap, config: &Config) -> Channel { } } let mut item = item.clone(); + if item.author.is_none() { + item.set_author(user.name.clone()); + } let item_title = { let title = item.title.as_ref().unwrap_or(&config.default_title); let mut args = HashMap::new(); @@ -40,10 +45,23 @@ pub fn bundle_rss(feeds: &HashMap, config: &Config) -> Channel { } } }; - item.set_title(item_title); - if item.author.is_none() { - item.set_author(user.name.clone()); + if let Some(guid) = &item.guid { + if !state.guids.contains(&guid.value) { + state.guids.insert(guid.value.clone()); + + let data = HookData { + title: item.title.as_ref().unwrap_or(&config.default_title).to_owned(), + title_fmt: item_title.clone(), + author: item.author.clone().unwrap(), + link: item.link.clone().unwrap_or_default(), + guid: item.guid.clone().map(|g| g.value).unwrap_or_default(), + pub_date: item.pub_date.clone().unwrap_or_default(), + }; + + hookdata.push(data); + } } + item.set_title(item_title); bundle.items.push(item.clone()); } } @@ -51,7 +69,7 @@ pub fn bundle_rss(feeds: &HashMap, config: &Config) -> Channel { if let Some(date) = most_recent_date { bundle.set_pub_date(date.to_rfc2822()); } - bundle + (hookdata, bundle) } pub fn gen_status(feeds: &HashMap) -> String { @@ -76,4 +94,4 @@ pub fn gen_status(feeds: &HashMap) -> String { w_user=max_user_length, w_time=max_timestamp_length)); } lines.join("\n") -} \ No newline at end of file +} diff --git a/src/main.rs b/src/main.rs index 1b8599b..de24607 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,18 @@ #![warn(clippy::pedantic)] -use std::{collections::HashMap, thread, sync::{Mutex, Arc}, time::Duration, env::args, process::ExitCode, fs}; +use std::{collections::{HashMap, HashSet}, thread, sync::{Mutex, Arc}, time::Duration, process::ExitCode, fs, panic::catch_unwind, io::{BufWriter, Write}}; use chrono::{DateTime, Utc}; -use config::Config; +use config::{Config, User}; use query::update_feeds; use rss::Channel; -use crate::{junction::{bundle_rss, gen_status}}; +use crate::{junction::{bundle_rss, gen_status}, hooks::run_hook}; mod config; mod query; mod junction; mod server; +mod hooks; #[derive(Clone, Debug)] pub struct Feed { @@ -22,37 +23,26 @@ pub struct Feed { pub struct State { rss: String, + guids: HashSet, + feeds: HashMap, status: Option, } fn main() -> ExitCode { - let mut args = args(); - let exe = args.next(); - let config_file = args.next(); - let config_file = match &config_file { - Some(s) if s == "--help" => { - eprintln!( - "Usage: {} \nDocumentation available at https://github.com/trimill/rss-bundler", - exe.unwrap_or_else(|| "rssbundler".into())); - return 0.into() - } - Some(file) => file, - None => { - eprintln!("No config file provided."); - return 1.into() - } - }; - let config = match load_config(config_file) { + let config = match load_config() { Ok(config) => config, Err(e) => { eprintln!("Error loading config: {}", e); return 1.into() } }; - let mut feeds = HashMap::new(); + + let guids = load_guids().unwrap_or_default(); let state = State { rss: "".into(), + guids, + feeds: HashMap::new(), status: None, }; @@ -66,25 +56,50 @@ fn main() -> ExitCode { let sleep_duration = Duration::from_secs(60 * config.refresh_time); loop { - update_feeds(&mut feeds, &config); - let bundle = bundle_rss(&feeds, &config); - let status = if config.status_page { - Some(gen_status(&feeds)) - } else { None }; + let result = catch_unwind(|| { + let mut guard = state.lock().unwrap(); + + update_feeds(&mut guard.feeds, &config); + let (hookdata, bundle) = bundle_rss(&mut guard, &config); + let status = if config.status_page { + Some(gen_status(&guard.feeds)) + } else { None }; - let mut guard = state.lock().unwrap(); - guard.status = status; - guard.rss = bundle.to_string(); - drop(guard); + if let Some(hook) = &config.hook { + run_hook(hook.to_owned(), hookdata).unwrap(); + } - println!("Feeds updated"); + guard.status = status; + guard.rss = bundle.to_string(); + save_guids(&guard.guids).unwrap(); + drop(guard); + }); + if result.is_err() { + eprintln!("Error occured white updating"); + } else { + println!("Feeds updated"); + } thread::sleep(sleep_duration); } } -fn load_config(config_file: &str) -> Result> { - let content = fs::read_to_string(config_file)?; +fn load_config() -> Result> { + let content = fs::read_to_string("config.json")?; let config: Config = serde_json::from_str(&content)?; Ok(config) -} \ No newline at end of file +} + +fn load_guids() -> Result, Box> { + let content = fs::read_to_string("guids")?; + Ok(content.split("\n").filter(|x| x.len() > 0).map(str::to_owned).collect()) +} + +fn save_guids(guids: &HashSet) -> Result<(), Box> { + let file = fs::OpenOptions::new().create(true).write(true).open("guids")?; + let mut writer = BufWriter::new(file); + for guid in guids { + writeln!(writer, "{}", guid)?; + } + Ok(()) +} diff --git a/src/query.rs b/src/query.rs index 0ada971..c97ad86 100644 --- a/src/query.rs +++ b/src/query.rs @@ -10,17 +10,16 @@ use crate::config::{User, Config}; pub fn update_feeds(feeds: &mut HashMap, config: &Config) { let client = Client::new(); for user in &config.users { - let feed = match feeds.get_mut(user) { - Some(feed) => feed, - None => { - let feed = Feed { - channel: None, - error_message: None, - last_fetched: Utc.ymd(1970, 1, 1).and_hms(0, 0, 0) - }; - feeds.insert(user.clone(), feed); - feeds.get_mut(user).unwrap() - } + let feed = if let Some(feed) = feeds.get_mut(user) { + feed + } else { + let feed = Feed { + channel: None, + error_message: None, + last_fetched: Utc.ymd(1970, 1, 1).and_hms(0, 0, 0) + }; + feeds.insert(user.clone(), feed); + feeds.get_mut(user).unwrap() }; let res = client.get(&user.rss) .timeout(Duration::from_secs(5)) @@ -45,4 +44,4 @@ pub fn update_feeds(feeds: &mut HashMap, config: &Config) { Err(e) => feed.error_message = Some(e.to_string()), } } -} \ No newline at end of file +}