use anyhow::{Context, Result}; use chrono::{DateTime, Duration, Local, Utc}; use clap::{arg, command, Parser}; use log::{debug, trace}; use megalodon::{ entities::{Account, Status, StatusVisibility}, generator, megalodon::{GetAccountStatusesInputOptions, GetLocalTimelineInputOptions}, response::Response, Megalodon, }; use tokio::fs::try_exists; use tokio_stream::{iter, StreamExt}; use std::{env, fs::File, io::prelude::*}; use self::{ format::format_status, page::{bounds_from, Page}, range::try_create_range, range::Range, }; mod format; mod page; mod range; #[derive(Debug, Parser)] #[command()] struct Config { #[arg(short, long, env = "MASTODON_URL", required = true)] url: String, #[arg(short, long, env = "MASTODON_ACCESS_TOKEN", required = true)] access_token: String, #[arg(short, long)] output_dir: Option, #[arg(required = true)] date: String, #[arg(short, long, action = clap::ArgAction::Count)] verbose: u8, } #[tokio::main] async fn main() -> Result<()> { let Config { date, verbose, url, access_token, output_dir, } = Config::parse(); let level = match verbose { 0 => "off", 1 => "debug", _ => "trace", }; env::set_var("RUST_LOG", format!("{}={}", module_path!(), level)); env_logger::init(); let day = try_create_range(date.clone())?; let client = create_client(url, access_token)?; let Response { json: account, .. } = client.verify_account_credentials().await?; debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d")); // the server only provides a page of results at a time, keep the oldest status from any page // to request the next older page of statuses let mut last_id_on_page: Option = None; // store the formatted posts in server order, reversed chronologically, to reverse at the end // for regular chronological ordering let mut reversed = Vec::new(); loop { let statuses = fetch_page(&client, &last_id_on_page).await?; if statuses.is_empty() { debug!("No more posts in range."); break; } let page = bounds_from(&statuses); trace!("Page bounds {:?}", page); let (last_id, next_iter, mut formatted) = process_page(&client, &account, &statuses, &last_id_on_page, &day, 1).await?; reversed.append(&mut formatted); if let Some(NextIter::Stop) = next_iter { break; } if let Some(last_id) = last_id { last_id_on_page.replace(last_id); } if let Some(NextIter::Skip) = next_iter { continue; } } last_id_on_page = None; loop { let statuses = fetch_dm_page(&client, &account, &last_id_on_page).await?; if statuses.is_empty() { debug!("No more DMs in range."); break; } let page = bounds_from(&statuses); trace!("Page bounds {:?}", page); let (last_id, next_iter, mut formatted) = process_page(&client, &account, &statuses, &last_id_on_page, &day, 0).await?; reversed.append(&mut formatted); if let Some(NextIter::Stop) = next_iter { break; } if let Some(last_id) = last_id { last_id_on_page.replace(last_id); } if let Some(NextIter::Skip) = next_iter { continue; } } reversed.reverse(); if let Some(output_dir) = output_dir { let output = format!("{}/{}.md", output_dir.trim_end_matches("/"), date); let mut f = match try_exists(&output).await { Ok(exists) if exists => { debug!("Appending {}", output); let mut file = File::options().append(true).open(&output)?; file.write("\n\n".as_bytes())?; file } _ => { debug!("Writing {}", output); let mut file = File::options() .create(true) .append(true) .open(&output) .with_context(|| format!("Failed to create {}", output))?; file.write(format!("# {}\n\n", day.end.format("%Y-%m-%d")).as_bytes())?; // TODO move to separate function file.write(create_back_link(&day.end, "[One week ago](diary:{})\n", 7).as_bytes())?; file.write( create_back_link(&day.end, "[One month ago](diary:{})\n", 30).as_bytes(), )?; file.write( create_back_link(&day.end, "[Six months ago](diary:{})\n", 6 * 30).as_bytes(), )?; file.write( create_back_link(&day.end, "[One year ago](diary:{})\n", 365).as_bytes(), )?; file.write( create_back_link(&day.end, "[Two years ago](diary:{})\n", 365 * 2).as_bytes(), )?; file.write( create_back_link(&day.end, "[Three years ago](diary:{})\n", 365 * 3).as_bytes(), )?; file } }; f.write_all(&reversed.join("\n\n").as_bytes()) .with_context(|| format!("Failed to write all to {}", output))?; println!("Appended matching posts to {}.", output); } else { println!("{}", reversed.join("\n\n")); } Ok(()) } fn create_back_link(day_end: &DateTime, anchor_text: &str, ago: i64) -> String { let prior_date = *day_end - Duration::days(ago); // TODO check if the file exists format!( "[{}](diary:{})\n", anchor_text, prior_date.format("%Y-%m-%d") ) } enum NextIter { Skip, Stop, } async fn process_page( client: &Box, account: &Account, statuses: &Vec, last_id_on_page: &Option, day: &Range, depth: usize, ) -> Result<(Option, Option, Vec)> { let page = bounds_from(&statuses); trace!("Page bounds {:?}", page); // this age comparison only applies after the first page is fetched; the rest of the loop // body handles if the requested date is newer than any statuses on the first page if last_id_on_page.is_some() && page_start_older_than(&page, day) { return Ok((None, Some(NextIter::Stop), Vec::new())); } // fetching returns 20 at a time, in reverse chronological order so may require skipping // pages after the requested date if let Some(oldest_id) = page_newer_than(&page, &day) { return Ok((Some(oldest_id), Some(NextIter::Skip), Vec::new())); } // mapping the vector runs into thorny ownership issues and only produces futures, not // resolved values; a for in loop works with await but also runs into thorny ownership // issues; a stream resolves both because the stream takes ownership of the statuses and // can be iterated in a simple way that allows the use of await in the body let mut stream = iter(filter_statuses(account, &day, &statuses)); let mut formatted = Vec::new(); while let Some(status) = stream.next().await { formatted.push(format_status(client, depth, &account, status).await?); } if page_end_older_than(&page, &day) { debug!("No more posts in range."); return Ok((None, Some(NextIter::Stop), formatted)); } if let Some(id) = page.oldest_id { return Ok((Some(id.clone()), None, formatted)); } else { return Ok((None, None, formatted)); } } // Only ones authored by the user, on the date requested, that aren't a reply to any other status fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a Vec) -> Vec<&'a Status> { json.iter() .filter(|status| { status.account.id == account.id && status.in_reply_to_id.is_none() && day.start <= status.created_at && status.created_at <= day.end }) .collect::>() } fn create_client(url: String, token: String) -> Result> { Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None)) } async fn fetch_page( client: &Box, last_id_on_page: &Option, ) -> Result> { trace!("Fetching page of local timeline"); let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() { trace!("Fetching next page"); client .get_local_timeline(Some(&GetLocalTimelineInputOptions { max_id: Some(max_id.clone()), ..GetLocalTimelineInputOptions::default() })) .await? } else { trace!("Fetching first page"); client.get_local_timeline(None).await? }; Ok(json) } async fn fetch_dm_page( client: &Box, account: &Account, last_id_on_page: &Option, ) -> Result> { trace!("Fetching page of DMs"); let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() { trace!("Fetching next page"); client .get_account_statuses( account.id.clone(), Some(&GetAccountStatusesInputOptions { max_id: Some(max_id.clone()), ..GetAccountStatusesInputOptions::default() }), ) .await? } else { trace!("Fetching first page"); client .get_account_statuses(account.id.clone(), None) .await? }; let json: Vec = json .into_iter() .filter(|s| { if let StatusVisibility::Direct = s.visibility { (s.in_reply_to_account_id.is_none() || s.in_reply_to_account_id .as_ref() .map(|r| r == &account.id) .unwrap_or_default()) && s.mentions.is_empty() } else { false } }) .collect(); Ok(json) } fn page_newer_than(page: &Page, range: &Range) -> Option { page.oldest .filter(|oldest| *oldest > &range.end) .and_then(|_| page.oldest_id.clone()) } fn page_end_older_than(page: &Page, range: &Range) -> bool { status_older_than(&page.oldest, &range.start) } fn page_start_older_than(page: &Page, range: &Range) -> bool { status_older_than(&page.newest, &range.start) } fn status_older_than(status: &Option<&DateTime>, dt: &DateTime) -> bool { status.map(|status| status < dt).unwrap_or_default() }