Initial code dump: matrix feedbot, aka bender
This is a rewrite of our old feedbot in rust, heavily inspired from rek2's INN matrix bot and making use of some bits from matrix-rust-sdk This is an asynchronous tokio-based matrix client using a stateless feed fetcher implementation based on reqwest, it uses feed_rs for parsing RSS and Atom feeds. State persistence is achieved using a simple file-backed datastore with serde_yaml as a serialization format. Published under the GNU General Public License version 3 or later.
This commit is contained in:
112
src/feedreader.rs
Normal file
112
src/feedreader.rs
Normal file
@ -0,0 +1,112 @@
|
||||
/**
|
||||
* matrix-feedbot v0.1.0
|
||||
*
|
||||
* Copyright (C) 2024 The 1312 Media Collective
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use reqwest::header::{ HeaderMap, ACCEPT, USER_AGENT };
|
||||
use tracing::{ info, debug };
|
||||
use feed_rs::{ model, parser };
|
||||
use chrono::{ DateTime, Utc };
|
||||
use std::error::Error;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Feed {
|
||||
pub uri: String,
|
||||
pub title: String,
|
||||
pub model: model::Feed
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Entry {
|
||||
pub ts: DateTime<Utc>,
|
||||
pub title: String,
|
||||
pub link: String,
|
||||
pub content: String,
|
||||
pub feed: Feed,
|
||||
#[allow(dead_code)]
|
||||
pub model: model::Entry,
|
||||
pub formatted: Option<String>
|
||||
}
|
||||
|
||||
pub async fn fetch_and_parse_feed(uri: &str) -> Result<Feed, Box<dyn Error>> {
|
||||
info!("Fetching feed at {}", uri);
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(USER_AGENT,
|
||||
"matrix-feedbot/0.1.0 (compatible; Mozilla/5.0; +https://1312.media/)"
|
||||
.parse().unwrap());
|
||||
|
||||
headers.insert(ACCEPT,
|
||||
"application/rss+xml, application/rdf+xml;q=0.8, application/atom+xml;q=0.6, application/xml;q=0.4, text/xml;q=0.4"
|
||||
.parse().unwrap());
|
||||
|
||||
let http_client = reqwest::Client::builder().default_headers(headers).build()?;
|
||||
let response = http_client.get(uri).send().await?.text().await?;
|
||||
|
||||
info!("Got response, parsing feed");
|
||||
let feed = parser::parse(response.as_bytes())?;
|
||||
|
||||
let feed_title = match feed.title.clone() {
|
||||
Some(t) => t.content,
|
||||
None => String::from("Untitled")
|
||||
};
|
||||
|
||||
info!("Got feed with title \"{}\"", feed_title);
|
||||
|
||||
Ok(Feed {
|
||||
uri: String::from(uri),
|
||||
title: feed_title,
|
||||
model: feed
|
||||
})
|
||||
}
|
||||
|
||||
pub fn format_entry(feed: Feed, entry: model::Entry) -> Result<Entry, Box<dyn Error>> {
|
||||
debug!("Formatting entry {}", entry.id);
|
||||
|
||||
let mut e = Entry {
|
||||
feed: feed,
|
||||
title: match entry.title.clone() {
|
||||
Some(t) => t.content,
|
||||
None => String::from("Untitled")
|
||||
},
|
||||
link: entry.links[0].href.clone(),
|
||||
ts: match entry.updated {
|
||||
Some(d) => d,
|
||||
None => entry.published.unwrap_or(Utc::now())
|
||||
},
|
||||
content: match entry.content.clone() {
|
||||
Some(c) => c.body.unwrap_or(String::from("")),
|
||||
None => match entry.summary.clone() {
|
||||
Some(s) => s.content,
|
||||
None => String::from("")
|
||||
}
|
||||
},
|
||||
model: entry,
|
||||
formatted: None
|
||||
};
|
||||
|
||||
e.formatted = Some(format!(
|
||||
"<b>{feed_title}: <a href=\"{link}\">{title}</a> on {date}</b>{content}",
|
||||
|
||||
feed_title=e.feed.title,
|
||||
link=e.link,
|
||||
title=e.title,
|
||||
date=e.ts.to_rfc2822(),
|
||||
content=e.content
|
||||
));
|
||||
|
||||
Ok(e)
|
||||
}
|
Reference in New Issue
Block a user