kensho/src/main.rs

181 lines
5.2 KiB
Rust
Raw Normal View History

2023-07-15 18:18:27 +00:00
use anyhow::{bail, format_err, Result};
use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone, Utc};
2023-07-15 12:47:57 +00:00
use html2md::parse_html;
2023-07-15 18:18:27 +00:00
use log::{debug, trace};
2023-07-15 13:20:51 +00:00
use megalodon::{
2023-07-15 18:18:27 +00:00
entities::Status, generator, megalodon::GetLocalTimelineInputOptions, response::Response,
Megalodon,
2023-07-15 13:20:51 +00:00
};
2023-07-15 18:18:27 +00:00
use tokio_stream::{iter, StreamExt};
2023-07-15 12:47:57 +00:00
use std::env;
#[derive(Debug)]
2023-07-15 18:18:27 +00:00
struct Range {
start: DateTime<Local>,
end: DateTime<Local>,
2023-07-15 12:47:57 +00:00
}
2023-07-15 18:18:27 +00:00
#[derive(Debug)]
struct Page<'a> {
oldest_id: Option<String>,
oldest: Option<&'a DateTime<Utc>>,
newest: Option<&'a DateTime<Utc>>,
2023-07-15 13:20:51 +00:00
}
2023-07-15 12:47:57 +00:00
#[tokio::main]
async fn main() -> Result<()> {
2023-07-15 18:18:27 +00:00
env::set_var("RUST_LOG", format!("{}=debug", module_path!()));
2023-07-15 12:47:57 +00:00
env_logger::init();
2023-07-15 13:20:51 +00:00
// TODO add clap and argument for date
2023-07-15 18:18:27 +00:00
let day = try_create_range("2023-07-01")?;
2023-07-15 12:47:57 +00:00
2023-07-15 18:18:27 +00:00
debug!("Date {}", day.end.format("%Y-%m-%d"));
let client = create_client()?;
let mut last_id_on_page: Option<String> = None;
debug!("Fetching posts");
2023-07-15 12:47:57 +00:00
loop {
2023-07-15 18:18:27 +00:00
let json = fetch_page(&client, &last_id_on_page).await?;
let page = Page {
newest: json.first().map(|s| &s.created_at),
oldest_id: json.last().map(|s| s.id.clone()),
oldest: json.last().map(|s| &s.created_at),
2023-07-15 12:47:57 +00:00
};
2023-07-15 18:18:27 +00:00
trace!("Page bounds {:?}", page);
2023-07-15 12:47:57 +00:00
2023-07-15 18:18:27 +00:00
if last_id_on_page.is_some() && page_start_older_than(&page, &day) {
break;
2023-07-15 12:47:57 +00:00
}
2023-07-15 18:18:27 +00:00
if let Some(oldest_id) = page_newer_than(&page, &day) {
last_id_on_page.replace(oldest_id);
continue;
}
let json = json
.clone()
.into_iter()
.filter(|json| day.start <= json.created_at && json.created_at <= day.end)
.collect::<Vec<Status>>();
trace!("Filtered to {} post(s)", json.len());
let mut stream = iter(json);
while let Some(status) = stream.next().await {
println!(
"{}
2023-07-15 13:20:51 +00:00
> {}",
2023-07-15 18:18:27 +00:00
status.created_at.with_timezone(&Local).format("%H:%M"),
parse_html(&status.content)
);
let Response { json, .. } = client.get_status_context(status.id, None).await?;
let thread = json
2023-07-15 13:20:51 +00:00
.descendants
2023-07-15 18:18:27 +00:00
.into_iter()
.map(|status| {
2023-07-15 13:20:51 +00:00
format!(
">
2023-07-15 18:18:27 +00:00
> {}
2023-07-15 13:20:51 +00:00
>> {}",
2023-07-15 18:18:27 +00:00
status.created_at.with_timezone(&Local).format("%H:%M"),
parse_html(&status.content)
2023-07-15 13:20:51 +00:00
)
2023-07-15 12:47:57 +00:00
})
2023-07-15 13:20:51 +00:00
.collect::<Vec<String>>()
2023-07-15 18:18:27 +00:00
.join("\n");
println!("{}", thread);
}
if page_end_older_than(&page, &day) {
debug!("No more posts in range.");
break;
}
if let Some(id) = page.oldest_id {
last_id_on_page.replace(id.clone());
}
2023-07-15 12:47:57 +00:00
}
Ok(())
2023-06-03 19:59:08 +00:00
}
2023-07-15 18:18:27 +00:00
fn create_client() -> Result<Box<dyn Megalodon + Send + Sync>> {
let url = env::var("MASTODON_URL")?;
let token = env::var("MASTODON_ACCESS_TOKEN")?;
Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None))
}
async fn fetch_page(
client: &Box<dyn Megalodon + Send + Sync>,
last_id_on_page: &Option<String>,
) -> Result<Vec<Status>> {
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
debug!("Fetching next page");
client
.get_local_timeline(Some(&GetLocalTimelineInputOptions {
max_id: Some(max_id.clone()),
..GetLocalTimelineInputOptions::default()
}))
.await?
} else {
debug!("Fetching first page");
client.get_local_timeline(None).await?
};
Ok(json)
}
fn try_create_range<S: AsRef<str>>(date: S) -> Result<Range> {
Ok(Range {
start: create_day_bound(&date, 0, 0, 0)?,
end: create_day_bound(date, 23, 59, 59)?,
})
}
fn create_day_bound<S: AsRef<str>>(
day: S,
hour: u32,
minute: u32,
second: u32,
) -> Result<DateTime<Local>> {
let ts: Vec<&str> = day.as_ref().split("-").collect();
if ts.len() != 3 {
bail!("Invalid date format! {}", day.as_ref())
}
let (year, month, day) = if let [year, month, day, ..] = &ts[..] {
(year, month, day)
} else {
bail!("Invalid date format! {}", day.as_ref())
};
let b = Local.from_local_datetime(
&NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?)
.ok_or_else(|| format_err!("Invalid date!"))?
.and_hms_opt(hour, minute, second)
.ok_or_else(|| format_err!("Invalid time!"))?,
);
if let LocalResult::Single(b) = b {
Ok(b)
} else {
bail!("Cannot construct day boundary!")
}
}
fn page_newer_than(page: &Page, range: &Range) -> Option<String> {
page.oldest
.filter(|oldest| *oldest > &range.end)
.and_then(|_| page.oldest_id.clone())
}
fn page_end_older_than(page: &Page, range: &Range) -> bool {
status_older_than(&page.oldest, &range.start)
}
fn page_start_older_than(page: &Page, range: &Range) -> bool {
status_older_than(&page.newest, &range.start)
}
fn status_older_than(status: &Option<&DateTime<Utc>>, dt: &DateTime<Local>) -> bool {
status.map(|status| status < dt).unwrap_or_default()
}