kensho/src/main.rs

178 lines
5.6 KiB
Rust
Raw Normal View History

use anyhow::Result;
use chrono::{DateTime, Local, Utc};
2023-07-15 19:38:48 +00:00
use clap::{arg, command, Parser};
2023-07-15 18:18:27 +00:00
use log::{debug, trace};
2023-07-15 13:20:51 +00:00
use megalodon::{
entities::{Account, Status},
generator,
megalodon::GetLocalTimelineInputOptions,
response::Response,
2023-07-15 18:18:27 +00:00
Megalodon,
2023-07-15 13:20:51 +00:00
};
2023-07-15 18:18:27 +00:00
use tokio_stream::{iter, StreamExt};
2023-07-16 01:30:14 +00:00
use std::{env, fs::File, io::prelude::*};
2023-07-15 12:47:57 +00:00
use self::{
format::format_status,
page::{bounds_from, Page},
range::try_create_range,
range::Range,
};
2023-07-15 12:47:57 +00:00
mod format;
mod page;
mod range;
2023-07-15 13:20:51 +00:00
2023-07-15 19:38:48 +00:00
#[derive(Debug, Parser)]
#[command()]
struct Config {
2023-07-16 12:22:28 +00:00
#[arg(short, long, env = "MASTODON_URL", required = true)]
2023-07-16 01:30:14 +00:00
url: String,
2023-07-16 12:22:28 +00:00
#[arg(short, long, env = "MASTODON_ACCESS_TOKEN", required = true)]
2023-07-16 01:30:14 +00:00
access_token: String,
#[arg(short, long)]
2023-07-16 12:22:28 +00:00
output_dir: Option<String>,
2023-07-15 19:38:48 +00:00
#[arg(required = true)]
date: String,
2023-07-16 13:16:18 +00:00
#[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8,
2023-07-15 19:38:48 +00:00
}
2023-07-15 12:47:57 +00:00
#[tokio::main]
async fn main() -> Result<()> {
2023-07-16 01:30:14 +00:00
let Config {
date,
verbose,
url,
access_token,
output_dir,
} = Config::parse();
2023-07-16 13:16:18 +00:00
let level = match verbose {
0 => "off",
1 => "debug",
_ => "trace",
};
env::set_var("RUST_LOG", format!("{}={}", module_path!(), level));
env_logger::init();
2023-07-15 19:38:48 +00:00
2023-07-16 01:30:14 +00:00
let day = try_create_range(date.clone())?;
2023-07-15 12:47:57 +00:00
2023-07-16 01:30:14 +00:00
let client = create_client(url, access_token)?;
let Response { json: account, .. } = client.verify_account_credentials().await?;
2023-07-15 18:18:27 +00:00
debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d"));
// the server only provides a page of results at a time, keep the oldest status from any page
// to request the next older page of statuses
2023-07-15 18:18:27 +00:00
let mut last_id_on_page: Option<String> = None;
// store the formatted posts in server order, reversed chronologically, to reverse at the end
// for regular chronological ordering
let mut reversed = Vec::new();
2023-07-15 12:47:57 +00:00
loop {
let statuses = fetch_page(&client, &last_id_on_page).await?;
if statuses.is_empty() {
debug!("No more posts in range.");
break;
}
let page = bounds_from(&statuses);
2023-07-15 18:18:27 +00:00
trace!("Page bounds {:?}", page);
2023-07-15 12:47:57 +00:00
// this age comparison only applies after the first page is fetched; the rest of the loop
// body handles if the requested date is newer than any statuses on the first page
2023-07-15 18:18:27 +00:00
if last_id_on_page.is_some() && page_start_older_than(&page, &day) {
break;
2023-07-15 12:47:57 +00:00
}
2023-07-15 18:18:27 +00:00
// fetching returns 20 at a time, in reverse chronological order so may require skipping
// pages after the requested date
2023-07-15 18:18:27 +00:00
if let Some(oldest_id) = page_newer_than(&page, &day) {
last_id_on_page.replace(oldest_id);
continue;
}
// mapping the vector runs into thorny ownership issues and only produces futures, not
// resolved values; a for in loop works with await but also runs into thorny ownership
// issues; a stream resolves both because the stream takes ownership of the statuses and
// can be iterated in a simple way that allows the use of await in the body
let mut stream = iter(filter_statuses(&account, &day, &statuses));
2023-07-15 18:18:27 +00:00
while let Some(status) = stream.next().await {
reversed.push(format_status(&client, &account, status).await?);
2023-07-15 18:18:27 +00:00
}
if page_end_older_than(&page, &day) {
debug!("No more posts in range.");
break;
}
if let Some(id) = page.oldest_id {
last_id_on_page.replace(id.clone());
}
2023-07-15 12:47:57 +00:00
}
reversed.reverse();
2023-07-16 12:22:28 +00:00
if let Some(output_dir) = output_dir {
let output = format!("{}{}.md", output_dir, date);
let mut f = File::options().append(true).open(&output)?;
f.write_all(&reversed.join("\n\n").as_bytes())?;
println!("Appended matching posts to {}.", output);
} else {
println!("{}", reversed.join("\n\n"));
}
2023-07-15 12:47:57 +00:00
Ok(())
2023-06-03 19:59:08 +00:00
}
2023-07-15 18:18:27 +00:00
2023-07-16 01:30:14 +00:00
// Only ones authored by the user, on the date requested, that aren't a reply to any other status
fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a Vec<Status>) -> Vec<&'a Status> {
json.iter()
2023-07-16 01:30:14 +00:00
.filter(|status| {
status.account.id == account.id
&& status.in_reply_to_id.is_none()
&& day.start <= status.created_at
&& status.created_at <= day.end
})
.collect::<Vec<&Status>>()
}
2023-07-16 01:30:14 +00:00
fn create_client(url: String, token: String) -> Result<Box<dyn Megalodon + Send + Sync>> {
2023-07-15 18:18:27 +00:00
Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None))
}
async fn fetch_page(
client: &Box<dyn Megalodon + Send + Sync>,
last_id_on_page: &Option<String>,
) -> Result<Vec<Status>> {
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
debug!("Fetching next page");
client
.get_local_timeline(Some(&GetLocalTimelineInputOptions {
max_id: Some(max_id.clone()),
..GetLocalTimelineInputOptions::default()
}))
.await?
} else {
debug!("Fetching first page");
client.get_local_timeline(None).await?
};
Ok(json)
}
fn page_newer_than(page: &Page, range: &Range) -> Option<String> {
page.oldest
.filter(|oldest| *oldest > &range.end)
.and_then(|_| page.oldest_id.clone())
}
fn page_end_older_than(page: &Page, range: &Range) -> bool {
status_older_than(&page.oldest, &range.start)
}
fn page_start_older_than(page: &Page, range: &Range) -> bool {
status_older_than(&page.newest, &range.start)
}
fn status_older_than(status: &Option<&DateTime<Utc>>, dt: &DateTime<Local>) -> bool {
status.map(|status| status < dt).unwrap_or_default()
}