2023-09-06 14:30:50 +00:00
|
|
|
use anyhow::{Context, Result};
|
2023-07-15 23:40:16 +00:00
|
|
|
use chrono::{DateTime, Local, Utc};
|
2023-07-15 19:38:48 +00:00
|
|
|
use clap::{arg, command, Parser};
|
2023-07-15 18:18:27 +00:00
|
|
|
use log::{debug, trace};
|
2023-07-15 13:20:51 +00:00
|
|
|
use megalodon::{
|
2023-07-25 21:08:34 +00:00
|
|
|
entities::{Account, Status, StatusVisibility},
|
2023-07-15 23:40:16 +00:00
|
|
|
generator,
|
2023-07-25 21:08:34 +00:00
|
|
|
megalodon::{GetAccountStatusesInputOptions, GetLocalTimelineInputOptions},
|
2023-07-15 23:40:16 +00:00
|
|
|
response::Response,
|
2023-07-15 18:18:27 +00:00
|
|
|
Megalodon,
|
2023-07-15 13:20:51 +00:00
|
|
|
};
|
2023-09-06 14:30:50 +00:00
|
|
|
use tokio::fs::try_exists;
|
2023-07-15 18:18:27 +00:00
|
|
|
use tokio_stream::{iter, StreamExt};
|
|
|
|
|
2023-07-16 01:30:14 +00:00
|
|
|
use std::{env, fs::File, io::prelude::*};
|
2023-07-15 12:47:57 +00:00
|
|
|
|
2023-07-15 23:40:16 +00:00
|
|
|
use self::{
|
|
|
|
format::format_status,
|
|
|
|
page::{bounds_from, Page},
|
|
|
|
range::try_create_range,
|
|
|
|
range::Range,
|
|
|
|
};
|
2023-07-15 12:47:57 +00:00
|
|
|
|
2023-07-15 23:40:16 +00:00
|
|
|
mod format;
|
|
|
|
mod page;
|
|
|
|
mod range;
|
2023-07-15 13:20:51 +00:00
|
|
|
|
2023-07-15 19:38:48 +00:00
|
|
|
#[derive(Debug, Parser)]
|
|
|
|
#[command()]
|
|
|
|
struct Config {
|
2023-07-16 12:22:28 +00:00
|
|
|
#[arg(short, long, env = "MASTODON_URL", required = true)]
|
2023-07-16 01:30:14 +00:00
|
|
|
url: String,
|
2023-07-16 12:22:28 +00:00
|
|
|
#[arg(short, long, env = "MASTODON_ACCESS_TOKEN", required = true)]
|
2023-07-16 01:30:14 +00:00
|
|
|
access_token: String,
|
|
|
|
#[arg(short, long)]
|
2023-07-16 12:22:28 +00:00
|
|
|
output_dir: Option<String>,
|
2023-07-15 19:38:48 +00:00
|
|
|
#[arg(required = true)]
|
|
|
|
date: String,
|
2023-07-16 13:16:18 +00:00
|
|
|
#[arg(short, long, action = clap::ArgAction::Count)]
|
|
|
|
verbose: u8,
|
2023-07-15 19:38:48 +00:00
|
|
|
}
|
|
|
|
|
2023-07-15 12:47:57 +00:00
|
|
|
#[tokio::main]
|
|
|
|
async fn main() -> Result<()> {
|
2023-07-16 01:30:14 +00:00
|
|
|
let Config {
|
|
|
|
date,
|
|
|
|
verbose,
|
|
|
|
url,
|
|
|
|
access_token,
|
|
|
|
output_dir,
|
|
|
|
} = Config::parse();
|
2023-07-15 23:40:16 +00:00
|
|
|
|
2023-07-16 13:16:18 +00:00
|
|
|
let level = match verbose {
|
|
|
|
0 => "off",
|
|
|
|
1 => "debug",
|
|
|
|
_ => "trace",
|
|
|
|
};
|
2023-07-15 23:40:16 +00:00
|
|
|
env::set_var("RUST_LOG", format!("{}={}", module_path!(), level));
|
2023-07-15 20:09:12 +00:00
|
|
|
env_logger::init();
|
2023-07-15 19:38:48 +00:00
|
|
|
|
2023-07-16 01:30:14 +00:00
|
|
|
let day = try_create_range(date.clone())?;
|
2023-07-15 12:47:57 +00:00
|
|
|
|
2023-07-16 01:30:14 +00:00
|
|
|
let client = create_client(url, access_token)?;
|
2023-07-15 20:09:12 +00:00
|
|
|
let Response { json: account, .. } = client.verify_account_credentials().await?;
|
2023-07-15 18:18:27 +00:00
|
|
|
|
2023-07-15 23:40:16 +00:00
|
|
|
debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d"));
|
|
|
|
|
|
|
|
// the server only provides a page of results at a time, keep the oldest status from any page
|
|
|
|
// to request the next older page of statuses
|
2023-07-15 18:18:27 +00:00
|
|
|
let mut last_id_on_page: Option<String> = None;
|
2023-07-15 23:40:16 +00:00
|
|
|
// store the formatted posts in server order, reversed chronologically, to reverse at the end
|
|
|
|
// for regular chronological ordering
|
2023-07-15 20:09:12 +00:00
|
|
|
let mut reversed = Vec::new();
|
2023-07-15 12:47:57 +00:00
|
|
|
loop {
|
2023-07-15 23:40:16 +00:00
|
|
|
let statuses = fetch_page(&client, &last_id_on_page).await?;
|
|
|
|
if statuses.is_empty() {
|
|
|
|
debug!("No more posts in range.");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
let page = bounds_from(&statuses);
|
|
|
|
|
2023-07-15 18:18:27 +00:00
|
|
|
trace!("Page bounds {:?}", page);
|
2023-07-15 12:47:57 +00:00
|
|
|
|
2023-07-25 21:08:34 +00:00
|
|
|
let (last_id, next_iter, mut formatted) =
|
|
|
|
process_page(&client, &account, &statuses, &last_id_on_page, &day, 1).await?;
|
|
|
|
reversed.append(&mut formatted);
|
|
|
|
if let Some(NextIter::Stop) = next_iter {
|
2023-07-15 18:18:27 +00:00
|
|
|
break;
|
2023-07-15 12:47:57 +00:00
|
|
|
}
|
2023-07-25 21:08:34 +00:00
|
|
|
if let Some(last_id) = last_id {
|
|
|
|
last_id_on_page.replace(last_id);
|
|
|
|
}
|
|
|
|
if let Some(NextIter::Skip) = next_iter {
|
2023-07-15 18:18:27 +00:00
|
|
|
continue;
|
|
|
|
}
|
2023-07-25 21:08:34 +00:00
|
|
|
}
|
|
|
|
last_id_on_page = None;
|
|
|
|
loop {
|
|
|
|
let statuses = fetch_dm_page(&client, &account, &last_id_on_page).await?;
|
|
|
|
if statuses.is_empty() {
|
|
|
|
debug!("No more DMs in range.");
|
|
|
|
break;
|
2023-07-15 18:18:27 +00:00
|
|
|
}
|
2023-07-25 21:08:34 +00:00
|
|
|
let page = bounds_from(&statuses);
|
2023-07-15 18:18:27 +00:00
|
|
|
|
2023-07-25 21:08:34 +00:00
|
|
|
trace!("Page bounds {:?}", page);
|
|
|
|
|
|
|
|
let (last_id, next_iter, mut formatted) =
|
|
|
|
process_page(&client, &account, &statuses, &last_id_on_page, &day, 0).await?;
|
|
|
|
reversed.append(&mut formatted);
|
|
|
|
if let Some(NextIter::Stop) = next_iter {
|
2023-07-15 18:18:27 +00:00
|
|
|
break;
|
|
|
|
}
|
2023-07-25 21:08:34 +00:00
|
|
|
if let Some(last_id) = last_id {
|
|
|
|
last_id_on_page.replace(last_id);
|
|
|
|
}
|
|
|
|
if let Some(NextIter::Skip) = next_iter {
|
|
|
|
continue;
|
2023-07-15 18:18:27 +00:00
|
|
|
}
|
2023-07-15 12:47:57 +00:00
|
|
|
}
|
2023-07-25 21:08:34 +00:00
|
|
|
|
2023-07-15 20:09:12 +00:00
|
|
|
reversed.reverse();
|
2023-07-25 21:08:34 +00:00
|
|
|
|
2023-07-16 12:22:28 +00:00
|
|
|
if let Some(output_dir) = output_dir {
|
2023-09-06 14:30:50 +00:00
|
|
|
let output = format!("{}/{}.md", output_dir.trim_end_matches("/"), date);
|
|
|
|
let mut f = match try_exists(&output).await {
|
|
|
|
Ok(exists) if exists => {
|
|
|
|
debug!("Appending {}", output);
|
|
|
|
File::options().append(true).open(&output)?
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
debug!("Writing {}", output);
|
|
|
|
File::options()
|
|
|
|
.create(true)
|
|
|
|
.append(true)
|
|
|
|
.open(&output)
|
|
|
|
.with_context(|| format!("Failed to create {}", output))?
|
|
|
|
}
|
|
|
|
};
|
|
|
|
f.write_all(&reversed.join("\n\n").as_bytes())
|
|
|
|
.with_context(|| format!("Failed to write all to {}", output))?;
|
2023-07-16 12:22:28 +00:00
|
|
|
println!("Appended matching posts to {}.", output);
|
|
|
|
} else {
|
|
|
|
println!("{}", reversed.join("\n\n"));
|
|
|
|
}
|
2023-07-15 12:47:57 +00:00
|
|
|
Ok(())
|
2023-06-03 19:59:08 +00:00
|
|
|
}
|
2023-07-15 18:18:27 +00:00
|
|
|
|
2023-07-25 21:08:34 +00:00
|
|
|
enum NextIter {
|
|
|
|
Skip,
|
|
|
|
Stop,
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn process_page(
|
|
|
|
client: &Box<dyn Megalodon + Send + Sync + 'static>,
|
|
|
|
account: &Account,
|
|
|
|
statuses: &Vec<Status>,
|
|
|
|
last_id_on_page: &Option<String>,
|
|
|
|
day: &Range,
|
|
|
|
depth: usize,
|
|
|
|
) -> Result<(Option<String>, Option<NextIter>, Vec<String>)> {
|
|
|
|
let page = bounds_from(&statuses);
|
|
|
|
|
|
|
|
trace!("Page bounds {:?}", page);
|
|
|
|
|
|
|
|
// this age comparison only applies after the first page is fetched; the rest of the loop
|
|
|
|
// body handles if the requested date is newer than any statuses on the first page
|
|
|
|
if last_id_on_page.is_some() && page_start_older_than(&page, day) {
|
|
|
|
return Ok((None, Some(NextIter::Stop), Vec::new()));
|
|
|
|
}
|
|
|
|
|
|
|
|
// fetching returns 20 at a time, in reverse chronological order so may require skipping
|
|
|
|
// pages after the requested date
|
|
|
|
if let Some(oldest_id) = page_newer_than(&page, &day) {
|
|
|
|
return Ok((Some(oldest_id), Some(NextIter::Skip), Vec::new()));
|
|
|
|
}
|
|
|
|
|
|
|
|
// mapping the vector runs into thorny ownership issues and only produces futures, not
|
|
|
|
// resolved values; a for in loop works with await but also runs into thorny ownership
|
|
|
|
// issues; a stream resolves both because the stream takes ownership of the statuses and
|
|
|
|
// can be iterated in a simple way that allows the use of await in the body
|
|
|
|
let mut stream = iter(filter_statuses(account, &day, &statuses));
|
|
|
|
let mut formatted = Vec::new();
|
|
|
|
while let Some(status) = stream.next().await {
|
|
|
|
formatted.push(format_status(client, depth, &account, status).await?);
|
|
|
|
}
|
|
|
|
|
|
|
|
if page_end_older_than(&page, &day) {
|
|
|
|
debug!("No more posts in range.");
|
|
|
|
return Ok((None, Some(NextIter::Stop), formatted));
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(id) = page.oldest_id {
|
|
|
|
return Ok((Some(id.clone()), None, formatted));
|
|
|
|
} else {
|
|
|
|
return Ok((None, None, formatted));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-16 01:30:14 +00:00
|
|
|
// Only ones authored by the user, on the date requested, that aren't a reply to any other status
|
2023-07-15 23:40:16 +00:00
|
|
|
fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a Vec<Status>) -> Vec<&'a Status> {
|
|
|
|
json.iter()
|
2023-07-16 01:30:14 +00:00
|
|
|
.filter(|status| {
|
|
|
|
status.account.id == account.id
|
|
|
|
&& status.in_reply_to_id.is_none()
|
|
|
|
&& day.start <= status.created_at
|
|
|
|
&& status.created_at <= day.end
|
2023-07-15 23:40:16 +00:00
|
|
|
})
|
|
|
|
.collect::<Vec<&Status>>()
|
|
|
|
}
|
|
|
|
|
2023-07-16 01:30:14 +00:00
|
|
|
fn create_client(url: String, token: String) -> Result<Box<dyn Megalodon + Send + Sync>> {
|
2023-07-15 18:18:27 +00:00
|
|
|
Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None))
|
|
|
|
}
|
|
|
|
|
|
|
|
async fn fetch_page(
|
|
|
|
client: &Box<dyn Megalodon + Send + Sync>,
|
|
|
|
last_id_on_page: &Option<String>,
|
|
|
|
) -> Result<Vec<Status>> {
|
2023-07-25 21:08:34 +00:00
|
|
|
trace!("Fetching page of local timeline");
|
2023-07-15 18:18:27 +00:00
|
|
|
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
|
2023-07-25 21:08:34 +00:00
|
|
|
trace!("Fetching next page");
|
2023-07-15 18:18:27 +00:00
|
|
|
client
|
|
|
|
.get_local_timeline(Some(&GetLocalTimelineInputOptions {
|
|
|
|
max_id: Some(max_id.clone()),
|
|
|
|
..GetLocalTimelineInputOptions::default()
|
|
|
|
}))
|
|
|
|
.await?
|
|
|
|
} else {
|
2023-07-25 21:08:34 +00:00
|
|
|
trace!("Fetching first page");
|
2023-07-15 18:18:27 +00:00
|
|
|
client.get_local_timeline(None).await?
|
|
|
|
};
|
|
|
|
Ok(json)
|
|
|
|
}
|
|
|
|
|
2023-07-25 21:08:34 +00:00
|
|
|
async fn fetch_dm_page(
|
|
|
|
client: &Box<dyn Megalodon + Send + Sync>,
|
|
|
|
account: &Account,
|
|
|
|
last_id_on_page: &Option<String>,
|
|
|
|
) -> Result<Vec<Status>> {
|
|
|
|
trace!("Fetching page of DMs");
|
|
|
|
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
|
|
|
|
trace!("Fetching next page");
|
|
|
|
client
|
|
|
|
.get_account_statuses(
|
|
|
|
account.id.clone(),
|
|
|
|
Some(&GetAccountStatusesInputOptions {
|
|
|
|
max_id: Some(max_id.clone()),
|
|
|
|
..GetAccountStatusesInputOptions::default()
|
|
|
|
}),
|
|
|
|
)
|
|
|
|
.await?
|
|
|
|
} else {
|
|
|
|
trace!("Fetching first page");
|
|
|
|
client
|
|
|
|
.get_account_statuses(account.id.clone(), None)
|
|
|
|
.await?
|
|
|
|
};
|
|
|
|
let json: Vec<Status> = json
|
|
|
|
.into_iter()
|
|
|
|
.filter(|s| {
|
|
|
|
if let StatusVisibility::Direct = s.visibility {
|
|
|
|
(s.in_reply_to_account_id.is_none()
|
|
|
|
|| s.in_reply_to_account_id
|
|
|
|
.as_ref()
|
|
|
|
.map(|r| r == &account.id)
|
|
|
|
.unwrap_or_default())
|
|
|
|
&& s.mentions.is_empty()
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
Ok(json)
|
|
|
|
}
|
|
|
|
|
2023-07-15 18:18:27 +00:00
|
|
|
fn page_newer_than(page: &Page, range: &Range) -> Option<String> {
|
|
|
|
page.oldest
|
|
|
|
.filter(|oldest| *oldest > &range.end)
|
|
|
|
.and_then(|_| page.oldest_id.clone())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn page_end_older_than(page: &Page, range: &Range) -> bool {
|
|
|
|
status_older_than(&page.oldest, &range.start)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn page_start_older_than(page: &Page, range: &Range) -> bool {
|
|
|
|
status_older_than(&page.newest, &range.start)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn status_older_than(status: &Option<&DateTime<Utc>>, dt: &DateTime<Local>) -> bool {
|
|
|
|
status.map(|status| status < dt).unwrap_or_default()
|
|
|
|
}
|