kensho/src/main.rs
Thomas Gideon 927a0c3257 Enhance back links
- Use directory listing to ensure linked entries exist.
- Use a filter array for within the past year.
- Read the first line summary of each entry to include in link text.
2024-10-06 10:28:06 -04:00

376 lines
12 KiB
Rust

use anyhow::{Context, Result};
use chrono::{DateTime, Duration, Local, Utc};
use clap::{arg, command, Parser};
use log::{debug, trace};
use megalodon::{
entities::{Account, Status, StatusVisibility},
generator,
megalodon::{GetAccountStatusesInputOptions, GetLocalTimelineInputOptions},
response::Response,
Megalodon,
};
use relativetime::RelativeTime;
use tokio::fs::try_exists;
use tokio_stream::{iter, StreamExt};
use std::{
env,
fs::{read_dir, File},
io::{prelude::*, BufReader},
};
use self::{
format::format_status,
page::{bounds_from, Page},
range::try_create_range,
range::Range,
};
mod format;
mod page;
mod range;
#[derive(Debug, Parser)]
#[command()]
struct Config {
#[arg(short, long, env = "MASTODON_URL", required = true)]
url: String,
#[arg(short, long, env = "MASTODON_ACCESS_TOKEN", required = true)]
access_token: String,
#[arg(short, long)]
output_dir: Option<String>,
#[arg(required = true)]
date: String,
#[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8,
}
#[tokio::main]
async fn main() -> Result<()> {
let Config {
date,
verbose,
url,
access_token,
output_dir,
} = Config::parse();
let level = match verbose {
0 => "off",
1 => "debug",
_ => "trace",
};
env::set_var("RUST_LOG", format!("{}={}", module_path!(), level));
env_logger::init();
let day = try_create_range(date.clone())?;
let client = create_client(url, access_token)?;
let Response { json: account, .. } = client.verify_account_credentials().await?;
debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d"));
// the server only provides a page of results at a time, keep the oldest status from any page
// to request the next older page of statuses
let mut last_id_on_page: Option<String> = None;
// store the formatted posts in server order, reversed chronologically, to reverse at the end
// for regular chronological ordering
let mut reversed = Vec::new();
loop {
let statuses = fetch_page(&client, &last_id_on_page).await?;
if statuses.is_empty() {
debug!("No more posts in range.");
break;
}
let page = bounds_from(&statuses);
trace!("Page bounds {:?}", page);
let (last_id, next_iter, mut formatted) =
process_page(&client, &account, &statuses, &last_id_on_page, &day, 1).await?;
reversed.append(&mut formatted);
if let Some(NextIter::Stop) = next_iter {
break;
}
if let Some(last_id) = last_id {
last_id_on_page.replace(last_id);
}
if let Some(NextIter::Skip) = next_iter {
continue;
}
}
last_id_on_page = None;
loop {
let statuses = fetch_dm_page(&client, &account, &last_id_on_page).await?;
if statuses.is_empty() {
debug!("No more DMs in range.");
break;
}
let page = bounds_from(&statuses);
trace!("Page bounds {:?}", page);
let (last_id, next_iter, mut formatted) =
process_page(&client, &account, &statuses, &last_id_on_page, &day, 0).await?;
reversed.append(&mut formatted);
if let Some(NextIter::Stop) = next_iter {
break;
}
if let Some(last_id) = last_id {
last_id_on_page.replace(last_id);
}
if let Some(NextIter::Skip) = next_iter {
continue;
}
}
reversed.reverse();
if let Some(output_dir) = output_dir {
let output = format!("{}/{}.md", output_dir.trim_end_matches("/"), date);
let mut f = match try_exists(&output).await {
Ok(exists) if exists => {
debug!("Appending {}", output);
let mut file = File::options().append(true).open(&output)?;
file.write_all("\n".as_bytes())?;
file
}
_ => {
debug!("Writing {}", output);
let mut file = File::options()
.create(true)
.append(true)
.open(&output)
.with_context(|| format!("Failed to create {}", output))?;
file.write_all(format!("# {}\n\n", day.end.format("%Y-%m-%d")).as_bytes())?;
let back_links = create_back_links(&output_dir, &day.end).await?;
debug!("Created {back_links:?}");
file.write_all(back_links.join("\n").as_bytes())
.with_context(|| "Failed to write back links!")?;
file.write_all(b"\n")?;
file
}
};
f.write_all(reversed.join("\n\n").as_bytes())
.with_context(|| format!("Failed to write all to {}", output))?;
println!("Appended matching posts to {}.", output);
} else {
println!("{}", reversed.join("\n\n"));
}
Ok(())
}
async fn create_back_links(output_dir: &str, this_day: &DateTime<Local>) -> Result<Vec<String>> {
//file.write_all(create_back_link_old(&day.end, "One week ago", 7).as_bytes())?;
//file.write_all(create_back_link_old(&day.end, "One month ago", 30).as_bytes())?;
//file.write_all(
// create_back_link_old(&day.end, "Six months ago", 6 * 30).as_bytes(),
//)?;
let within_year = [
(*this_day - Duration::days(7))
.format("%Y-%m-%d.md")
.to_string(),
(*this_day - Duration::days(30))
.format("%Y-%m-%d.md")
.to_string(),
(*this_day - Duration::days(6 * 30))
.format("%Y-%m-%d.md")
.to_string(),
];
let mut years_past: Vec<String> = read_dir(output_dir)?
.filter_map(|d| {
d.ok().and_then(|d| {
let d = d.file_name().to_owned();
let d = d.to_string_lossy().to_string();
if within_year.contains(&d)
|| (!d.starts_with(&this_day.format("%Y-").to_string())
&& d.ends_with(&this_day.format("-%m-%d.md").to_string()))
{
Some(d)
} else {
None
}
})
})
.collect();
years_past.sort();
years_past.reverse();
debug!("Found {years_past:?}");
let years_past = years_past
.into_iter()
.map(|b| {
let f = format!("{}/{}", output_dir.trim_end_matches("/"), b);
trace!("Building link for {f}");
let mut f =
BufReader::new(File::open(&f).with_context(|| format!("Could not open {f}"))?);
let mut first = String::default();
f.read_line(&mut first)
.with_context(|| format!("Failed to read first line of {b}"))?;
trace!("Read {first}");
let day = b.to_string();
let day = day.trim_end_matches(".md");
let day: DateTime<Local> = format!("{day}T00:00:00-04:00")
.parse()
.with_context(|| format!("Could not parse {day} as date!"))?;
let first = first.trim_start_matches(&format!("# {} - ", day.format("%Y-%m-%d")));
let link = format!(
"[{} - {}](diary:{})",
(day - *this_day).to_relative(),
first.trim(),
b
);
debug!("Link {link}");
Ok(link)
})
.collect::<Result<Vec<String>>>()?;
Ok(years_past)
}
enum NextIter {
Skip,
Stop,
}
async fn process_page(
client: &Box<dyn Megalodon + Send + Sync + 'static>,
account: &Account,
statuses: &Vec<Status>,
last_id_on_page: &Option<String>,
day: &Range,
depth: usize,
) -> Result<(Option<String>, Option<NextIter>, Vec<String>)> {
let page = bounds_from(&statuses);
trace!("Page bounds {:?}", page);
// this age comparison only applies after the first page is fetched; the rest of the loop
// body handles if the requested date is newer than any statuses on the first page
if last_id_on_page.is_some() && page_start_older_than(&page, day) {
return Ok((None, Some(NextIter::Stop), Vec::new()));
}
// fetching returns 20 at a time, in reverse chronological order so may require skipping
// pages after the requested date
if let Some(oldest_id) = page_newer_than(&page, &day) {
return Ok((Some(oldest_id), Some(NextIter::Skip), Vec::new()));
}
// mapping the vector runs into thorny ownership issues and only produces futures, not
// resolved values; a for in loop works with await but also runs into thorny ownership
// issues; a stream resolves both because the stream takes ownership of the statuses and
// can be iterated in a simple way that allows the use of await in the body
let mut stream = iter(filter_statuses(account, &day, &statuses));
let mut formatted = Vec::new();
while let Some(status) = stream.next().await {
formatted.push(format_status(client, depth, &account, status).await?);
}
if page_end_older_than(&page, &day) {
debug!("No more posts in range.");
return Ok((None, Some(NextIter::Stop), formatted));
}
if let Some(id) = page.oldest_id {
Ok((Some(id.clone()), None, formatted))
} else {
Ok((None, None, formatted))
}
}
// Only ones authored by the user, on the date requested, that aren't a reply to any other status
fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a [Status]) -> Vec<&'a Status> {
json.iter()
.filter(|status| {
status.account.id == account.id
&& status.in_reply_to_id.is_none()
&& day.start <= status.created_at
&& status.created_at <= day.end
})
.collect::<Vec<&Status>>()
}
fn create_client(url: String, token: String) -> Result<Box<dyn Megalodon + Send + Sync>> {
Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None))
}
async fn fetch_page(
client: &Box<dyn Megalodon + Send + Sync>,
last_id_on_page: &Option<String>,
) -> Result<Vec<Status>> {
trace!("Fetching page of local timeline");
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
trace!("Fetching next page");
client
.get_local_timeline(Some(&GetLocalTimelineInputOptions {
max_id: Some(max_id.clone()),
..GetLocalTimelineInputOptions::default()
}))
.await?
} else {
trace!("Fetching first page");
client.get_local_timeline(None).await?
};
Ok(json)
}
async fn fetch_dm_page(
client: &Box<dyn Megalodon + Send + Sync>,
account: &Account,
last_id_on_page: &Option<String>,
) -> Result<Vec<Status>> {
trace!("Fetching page of DMs");
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
trace!("Fetching next page");
client
.get_account_statuses(
account.id.clone(),
Some(&GetAccountStatusesInputOptions {
max_id: Some(max_id.clone()),
..GetAccountStatusesInputOptions::default()
}),
)
.await?
} else {
trace!("Fetching first page");
client
.get_account_statuses(account.id.clone(), None)
.await?
};
let json: Vec<Status> = json
.into_iter()
.filter(|s| {
if let StatusVisibility::Direct = s.visibility {
(s.in_reply_to_account_id.is_none()
|| s.in_reply_to_account_id
.as_ref()
.map(|r| r == &account.id)
.unwrap_or_default())
&& s.mentions.is_empty()
} else {
false
}
})
.collect();
Ok(json)
}
fn page_newer_than(page: &Page, range: &Range) -> Option<String> {
page.oldest
.filter(|oldest| *oldest > &range.end)
.and_then(|_| page.oldest_id.clone())
}
fn page_end_older_than(page: &Page, range: &Range) -> bool {
status_older_than(&page.oldest, &range.start)
}
fn page_start_older_than(page: &Page, range: &Range) -> bool {
status_older_than(&page.newest, &range.start)
}
fn status_older_than(status: &Option<&DateTime<Utc>>, dt: &DateTime<Local>) -> bool {
status.map(|status| status < dt).unwrap_or_default()
}