use anyhow::{Context, Result}; use chrono::{DateTime, Duration, Local, Utc}; use clap::{arg, command, Parser}; use log::{debug, trace}; use megalodon::{ entities::{Account, Status, StatusVisibility}, generator, megalodon::{GetAccountStatusesInputOptions, GetLocalTimelineInputOptions}, response::Response, Megalodon, }; use relativetime::RelativeTime; use tokio::fs::try_exists; use tokio_stream::{iter, StreamExt}; use std::{ env, fs::{read_dir, File}, io::{prelude::*, BufReader}, }; use self::{ format::format_status, page::{bounds_from, Page}, range::try_create_range, range::Range, }; mod format; mod page; mod range; #[derive(Debug, Parser)] #[command()] struct Config { #[arg(short, long, env = "MASTODON_URL", required = true)] url: String, #[arg(short, long, env = "MASTODON_ACCESS_TOKEN", required = true)] access_token: String, #[arg(short, long)] output_dir: Option, #[arg(required = true)] date: String, #[arg(short, long, action = clap::ArgAction::Count)] verbose: u8, } #[tokio::main] async fn main() -> Result<()> { let Config { date, verbose, url, access_token, output_dir, } = Config::parse(); let level = match verbose { 0 => "off", 1 => "debug", _ => "trace", }; env::set_var("RUST_LOG", format!("{}={}", module_path!(), level)); env_logger::init(); let day = try_create_range(date.clone())?; let client = create_client(url, access_token)?; let Response { json: account, .. } = client.verify_account_credentials().await?; debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d")); // the server only provides a page of results at a time, keep the oldest status from any page // to request the next older page of statuses let mut last_id_on_page: Option = None; // store the formatted posts in server order, reversed chronologically, to reverse at the end // for regular chronological ordering let mut reversed = Vec::new(); loop { let statuses = fetch_page(&client, &last_id_on_page).await?; if statuses.is_empty() { debug!("No more posts in range."); break; } let page = bounds_from(&statuses); trace!("Page bounds {:?}", page); let (last_id, next_iter, mut formatted) = process_page(&client, &account, &statuses, &last_id_on_page, &day, 1).await?; reversed.append(&mut formatted); if let Some(NextIter::Stop) = next_iter { break; } if let Some(last_id) = last_id { last_id_on_page.replace(last_id); } if let Some(NextIter::Skip) = next_iter { continue; } } last_id_on_page = None; loop { let statuses = fetch_dm_page(&client, &account, &last_id_on_page).await?; if statuses.is_empty() { debug!("No more DMs in range."); break; } let page = bounds_from(&statuses); trace!("Page bounds {:?}", page); let (last_id, next_iter, mut formatted) = process_page(&client, &account, &statuses, &last_id_on_page, &day, 0).await?; reversed.append(&mut formatted); if let Some(NextIter::Stop) = next_iter { break; } if let Some(last_id) = last_id { last_id_on_page.replace(last_id); } if let Some(NextIter::Skip) = next_iter { continue; } } reversed.reverse(); if let Some(output_dir) = output_dir { let output = format!("{}/{}.md", output_dir.trim_end_matches("/"), date); let mut f = match try_exists(&output).await { Ok(exists) if exists => { debug!("Appending {}", output); let mut file = File::options().append(true).open(&output)?; file.write_all("\n".as_bytes())?; file } _ => { debug!("Writing {}", output); let mut file = File::options() .create(true) .append(true) .open(&output) .with_context(|| format!("Failed to create {}", output))?; file.write_all(format!("# {}\n\n", day.end.format("%Y-%m-%d")).as_bytes())?; let back_links = create_back_links(&output_dir, &day.end).await?; debug!("Created {back_links:?}"); file.write_all(back_links.join("\n").as_bytes()) .with_context(|| "Failed to write back links!")?; file.write_all(b"\n")?; file } }; f.write_all(reversed.join("\n\n").as_bytes()) .with_context(|| format!("Failed to write all to {}", output))?; println!("Appended matching posts to {}.", output); } else { println!("{}", reversed.join("\n\n")); } Ok(()) } async fn create_back_links(output_dir: &str, this_day: &DateTime) -> Result> { //file.write_all(create_back_link_old(&day.end, "One week ago", 7).as_bytes())?; //file.write_all(create_back_link_old(&day.end, "One month ago", 30).as_bytes())?; //file.write_all( // create_back_link_old(&day.end, "Six months ago", 6 * 30).as_bytes(), //)?; let within_year = [ (*this_day - Duration::days(7)) .format("%Y-%m-%d.md") .to_string(), (*this_day - Duration::days(30)) .format("%Y-%m-%d.md") .to_string(), (*this_day - Duration::days(6 * 30)) .format("%Y-%m-%d.md") .to_string(), ]; let mut years_past: Vec = read_dir(output_dir)? .filter_map(|d| { d.ok().and_then(|d| { let d = d.file_name().to_owned(); let d = d.to_string_lossy().to_string(); if within_year.contains(&d) || (!d.starts_with(&this_day.format("%Y-").to_string()) && d.ends_with(&this_day.format("-%m-%d.md").to_string())) { Some(d) } else { None } }) }) .collect(); years_past.sort(); years_past.reverse(); debug!("Found {years_past:?}"); let years_past = years_past .into_iter() .map(|b| { let f = format!("{}/{}", output_dir.trim_end_matches("/"), b); trace!("Building link for {f}"); let mut f = BufReader::new(File::open(&f).with_context(|| format!("Could not open {f}"))?); let mut first = String::default(); f.read_line(&mut first) .with_context(|| format!("Failed to read first line of {b}"))?; trace!("Read {first}"); let day = b.to_string(); let day = day.trim_end_matches(".md"); let day: DateTime = format!("{day}T00:00:00-04:00") .parse() .with_context(|| format!("Could not parse {day} as date!"))?; let first = first.trim_start_matches(&format!("# {} - ", day.format("%Y-%m-%d"))); let link = format!( "[{} - {}](diary:{})", (day - *this_day).to_relative(), first.trim(), b ); debug!("Link {link}"); Ok(link) }) .collect::>>()?; Ok(years_past) } enum NextIter { Skip, Stop, } async fn process_page( client: &Box, account: &Account, statuses: &Vec, last_id_on_page: &Option, day: &Range, depth: usize, ) -> Result<(Option, Option, Vec)> { let page = bounds_from(&statuses); trace!("Page bounds {:?}", page); // this age comparison only applies after the first page is fetched; the rest of the loop // body handles if the requested date is newer than any statuses on the first page if last_id_on_page.is_some() && page_start_older_than(&page, day) { return Ok((None, Some(NextIter::Stop), Vec::new())); } // fetching returns 20 at a time, in reverse chronological order so may require skipping // pages after the requested date if let Some(oldest_id) = page_newer_than(&page, &day) { return Ok((Some(oldest_id), Some(NextIter::Skip), Vec::new())); } // mapping the vector runs into thorny ownership issues and only produces futures, not // resolved values; a for in loop works with await but also runs into thorny ownership // issues; a stream resolves both because the stream takes ownership of the statuses and // can be iterated in a simple way that allows the use of await in the body let mut stream = iter(filter_statuses(account, &day, &statuses)); let mut formatted = Vec::new(); while let Some(status) = stream.next().await { formatted.push(format_status(client, depth, &account, status).await?); } if page_end_older_than(&page, &day) { debug!("No more posts in range."); return Ok((None, Some(NextIter::Stop), formatted)); } if let Some(id) = page.oldest_id { Ok((Some(id.clone()), None, formatted)) } else { Ok((None, None, formatted)) } } // Only ones authored by the user, on the date requested, that aren't a reply to any other status fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a [Status]) -> Vec<&'a Status> { json.iter() .filter(|status| { status.account.id == account.id && status.in_reply_to_id.is_none() && day.start <= status.created_at && status.created_at <= day.end }) .collect::>() } fn create_client(url: String, token: String) -> Result> { Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None)) } async fn fetch_page( client: &Box, last_id_on_page: &Option, ) -> Result> { trace!("Fetching page of local timeline"); let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() { trace!("Fetching next page"); client .get_local_timeline(Some(&GetLocalTimelineInputOptions { max_id: Some(max_id.clone()), ..GetLocalTimelineInputOptions::default() })) .await? } else { trace!("Fetching first page"); client.get_local_timeline(None).await? }; Ok(json) } async fn fetch_dm_page( client: &Box, account: &Account, last_id_on_page: &Option, ) -> Result> { trace!("Fetching page of DMs"); let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() { trace!("Fetching next page"); client .get_account_statuses( account.id.clone(), Some(&GetAccountStatusesInputOptions { max_id: Some(max_id.clone()), ..GetAccountStatusesInputOptions::default() }), ) .await? } else { trace!("Fetching first page"); client .get_account_statuses(account.id.clone(), None) .await? }; let json: Vec = json .into_iter() .filter(|s| { if let StatusVisibility::Direct = s.visibility { (s.in_reply_to_account_id.is_none() || s.in_reply_to_account_id .as_ref() .map(|r| r == &account.id) .unwrap_or_default()) && s.mentions.is_empty() } else { false } }) .collect(); Ok(json) } fn page_newer_than(page: &Page, range: &Range) -> Option { page.oldest .filter(|oldest| *oldest > &range.end) .and_then(|_| page.oldest_id.clone()) } fn page_end_older_than(page: &Page, range: &Range) -> bool { status_older_than(&page.oldest, &range.start) } fn page_start_older_than(page: &Page, range: &Range) -> bool { status_older_than(&page.newest, &range.start) } fn status_older_than(status: &Option<&DateTime>, dt: &DateTime) -> bool { status.map(|status| status < dt).unwrap_or_default() }