From c90495a9850085ac8eef54a4d6308ffc4b94c555 Mon Sep 17 00:00:00 2001 From: Thomas Gideon Date: Sat, 15 Jul 2023 19:40:16 -0400 Subject: [PATCH] Handle image attachments, refactor more --- Cargo.lock | 138 ++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 + src/format.rs | 81 +++++++++++++++++++++++++ src/main.rs | 163 ++++++++++++++++---------------------------------- src/page.rs | 17 ++++++ src/range.rs | 43 +++++++++++++ 6 files changed, 334 insertions(+), 111 deletions(-) create mode 100644 src/format.rs create mode 100644 src/page.rs create mode 100644 src/range.rs diff --git a/Cargo.lock b/Cargo.lock index b71f645..7d44745 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -267,6 +267,17 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +[[package]] +name = "clipboard-win" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +dependencies = [ + "error-code", + "str-buf", + "winapi", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -418,6 +429,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + [[package]] name = "env_logger" version = "0.10.0" @@ -452,6 +469,16 @@ dependencies = [ "libc", ] +[[package]] +name = "error-code" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" +dependencies = [ + "libc", + "str-buf", +] + [[package]] name = "fastrand" version = "1.9.0" @@ -461,6 +488,17 @@ dependencies = [ "instant", ] +[[package]] +name = "fd-lock" +version = "3.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" +dependencies = [ + "cfg-if", + "rustix 0.38.4", + "windows-sys", +] + [[package]] name = "fnv" version = "1.0.7" @@ -634,6 +672,15 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys", +] + [[package]] name = "html2md" version = "0.2.14" @@ -886,8 +933,11 @@ dependencies = [ "log", "megalodon", "rss", + "rustyline", "tokio", "tokio-stream", + "url", + "url_open", ] [[package]] @@ -1059,6 +1109,27 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", + "static_assertions", +] + [[package]] name = "num-traits" version = "0.2.15" @@ -1282,6 +1353,16 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -1486,6 +1567,29 @@ dependencies = [ "untrusted", ] +[[package]] +name = "rustyline" +version = "12.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" +dependencies = [ + "bitflags 2.3.3", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "scopeguard", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "winapi", +] + [[package]] name = "ryu" version = "1.0.14" @@ -1670,6 +1774,18 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "str-buf" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" + [[package]] name = "string_cache" version = "0.8.7" @@ -1982,6 +2098,18 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + [[package]] name = "untrusted" version = "0.7.1" @@ -2000,6 +2128,16 @@ dependencies = [ "serde", ] +[[package]] +name = "url_open" +version = "0.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8c5b0472f1c021d8989aa63984084fc6c2d8624bc676db5c95cc14faff2ad6c" +dependencies = [ + "url", + "winapi", +] + [[package]] name = "urlencoding" version = "2.1.2" diff --git a/Cargo.toml b/Cargo.toml index e961b96..6d803a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,5 +14,8 @@ html2md = "0.2.14" log = "0.4.19" megalodon = "0.8.3" rss = "2.0.4" +rustyline = "12.0.0" tokio = { version = "1.28.2", features = ["default", "full"] } tokio-stream = "0.1.14" +url = "2.4.0" +url_open = "0.0.2" diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..2931952 --- /dev/null +++ b/src/format.rs @@ -0,0 +1,81 @@ +use anyhow::Result; +use chrono::Local; +use html2md::parse_html; +use log::debug; +use megalodon::{ + entities::{Account, Attachment, Status}, + response::Response, + Megalodon, +}; +use rustyline::DefaultEditor; +use url::Url; +use url_open::UrlOpen; + +pub(super) async fn format_status( + client: &Box, + account: &Account, + status: &Status, +) -> Result { + let ancestor = format!( + "{} +> {}{}", + status.created_at.with_timezone(&Local).format("%H:%M"), + parse_html(&status.content).trim(), + format_attachments(1, &status.media_attachments) + ); + let Response { json, .. } = client.get_status_context(status.id.clone(), None).await?; + let thread = json + .descendants + .into_iter() + .filter(|s| { + s.in_reply_to_account_id == Some(account.id.clone()) && s.account.id == account.id + }) + .map(|status| { + format!( + "> +> {} +>> {}{}", + status.created_at.with_timezone(&Local).format("%H:%M"), + parse_html(&status.content).trim(), + format_attachments(2, &status.media_attachments) + ) + }) + .collect::>() + .join("\n"); + Ok(format!( + "{}{}", + ancestor, + if !thread.is_empty() { + thread + } else { + String::default() + } + )) +} + +fn format_attachments(depth: usize, attachments: &[Attachment]) -> String { + let prefix = vec![">"; depth].join(""); + debug!("Attachments {:?}", attachments); + if attachments.is_empty() { + String::default() + } else { + let mut editor = DefaultEditor::new().unwrap(); + attachments + .iter() + .map(|a| { + Url::parse(&a.url).unwrap().open(); + let src = if let Ok(line) = editor.readline("Filename: ") { + line + } else { + a.url.clone() + }; + format!( + " +{} ", + prefix, src + ) + }) + .collect::>() + .join("\n") + } +} diff --git a/src/main.rs b/src/main.rs index 87fbffe..ebea522 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,28 +1,29 @@ -use anyhow::{bail, format_err, Result}; -use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone, Utc}; +use anyhow::Result; +use chrono::{DateTime, Local, Utc}; use clap::{arg, command, Parser}; -use html2md::parse_html; use log::{debug, trace}; use megalodon::{ - entities::Status, generator, megalodon::GetLocalTimelineInputOptions, response::Response, + entities::{Account, Status}, + generator, + megalodon::GetLocalTimelineInputOptions, + response::Response, Megalodon, }; + use tokio_stream::{iter, StreamExt}; use std::env; -#[derive(Debug)] -struct Range { - start: DateTime, - end: DateTime, -} +use self::{ + format::format_status, + page::{bounds_from, Page}, + range::try_create_range, + range::Range, +}; -#[derive(Debug)] -struct Page<'a> { - oldest_id: Option, - oldest: Option<&'a DateTime>, - newest: Option<&'a DateTime>, -} +mod format; +mod page; +mod range; #[derive(Debug, Parser)] #[command()] @@ -36,89 +37,54 @@ struct Config { #[tokio::main] async fn main() -> Result<()> { let Config { date, verbose } = Config::parse(); - if verbose { - env::set_var("RUST_LOG", format!("{}=debug", module_path!())); - } else { - env::set_var("RUST_LOG", format!("{}=none", module_path!())); - } + + let level = if verbose { "debug" } else { "off" }; + env::set_var("RUST_LOG", format!("{}={}", module_path!(), level)); env_logger::init(); let day = try_create_range(date)?; - debug!("Date {}", day.end.format("%Y-%m-%d")); - let client = create_client()?; let Response { json: account, .. } = client.verify_account_credentials().await?; + debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d")); + + // the server only provides a page of results at a time, keep the oldest status from any page + // to request the next older page of statuses let mut last_id_on_page: Option = None; - debug!("Fetching posts"); + // store the formatted posts in server order, reversed chronologically, to reverse at the end + // for regular chronological ordering let mut reversed = Vec::new(); loop { - let json = fetch_page(&client, &last_id_on_page).await?; - let page = Page { - newest: json.first().map(|s| &s.created_at), - oldest_id: json.last().map(|s| s.id.clone()), - oldest: json.last().map(|s| &s.created_at), - }; + let statuses = fetch_page(&client, &last_id_on_page).await?; + if statuses.is_empty() { + debug!("No more posts in range."); + break; + } + let page = bounds_from(&statuses); + trace!("Page bounds {:?}", page); + // this age comparison only applies after the first page is fetched; the rest of the loop + // body handles if the requested date is newer than any statuses on the first page if last_id_on_page.is_some() && page_start_older_than(&page, &day) { break; } + // fetching returns 20 at a time, in reverse chronological order so may require skipping + // pages after the requested date if let Some(oldest_id) = page_newer_than(&page, &day) { last_id_on_page.replace(oldest_id); continue; } - let json = json - .clone() - .into_iter() - .filter(|json| { - json.account.id == account.id - && day.start <= json.created_at - && json.created_at <= day.end - }) - .collect::>(); - trace!("Filtered to {} post(s)", json.len()); - - let mut stream = iter(json); - + // mapping the vector runs into thorny ownership issues and only produces futures, not + // resolved values; a for in loop works with await but also runs into thorny ownership + // issues; a stream resolves both because the stream takes ownership of the statuses and + // can be iterated in a simple way that allows the use of await in the body + let mut stream = iter(filter_statuses(&account, &day, &statuses)); while let Some(status) = stream.next().await { - let ancestor = format!( - "{} -> {}", - status.created_at.with_timezone(&Local).format("%H:%M"), - parse_html(&status.content).trim() - ); - let Response { json, .. } = client.get_status_context(status.id, None).await?; - let thread = json - .descendants - .into_iter() - .filter(|s| { - s.in_reply_to_account_id == Some(account.id.clone()) - && s.account.id == account.id - }) - .map(|status| { - format!( - "> -> {} ->> {}", - status.created_at.with_timezone(&Local).format("%H:%M"), - parse_html(&status.content).trim() - ) - }) - .collect::>() - .join("\n"); - reversed.push(format!( - "{}{}", - ancestor, - if !thread.is_empty() { - thread - } else { - String::default() - } - )); + reversed.push(format_status(&client, &account, status).await?); } if page_end_older_than(&page, &day) { @@ -135,6 +101,16 @@ async fn main() -> Result<()> { Ok(()) } +fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a Vec) -> Vec<&'a Status> { + json.iter() + .filter(|json| { + json.account.id == account.id + && day.start <= json.created_at + && json.created_at <= day.end + }) + .collect::>() +} + fn create_client() -> Result> { let url = env::var("MASTODON_URL")?; let token = env::var("MASTODON_ACCESS_TOKEN")?; @@ -160,41 +136,6 @@ async fn fetch_page( Ok(json) } -fn try_create_range>(date: S) -> Result { - Ok(Range { - start: create_day_bound(&date, 0, 0, 0)?, - end: create_day_bound(date, 23, 59, 59)?, - }) -} - -fn create_day_bound>( - day: S, - hour: u32, - minute: u32, - second: u32, -) -> Result> { - let ts: Vec<&str> = day.as_ref().split("-").collect(); - if ts.len() != 3 { - bail!("Invalid date format! {}", day.as_ref()) - } - let (year, month, day) = if let [year, month, day, ..] = &ts[..] { - (year, month, day) - } else { - bail!("Invalid date format! {}", day.as_ref()) - }; - let b = Local.from_local_datetime( - &NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?) - .ok_or_else(|| format_err!("Invalid date!"))? - .and_hms_opt(hour, minute, second) - .ok_or_else(|| format_err!("Invalid time!"))?, - ); - if let LocalResult::Single(b) = b { - Ok(b) - } else { - bail!("Cannot construct day boundary!") - } -} - fn page_newer_than(page: &Page, range: &Range) -> Option { page.oldest .filter(|oldest| *oldest > &range.end) diff --git a/src/page.rs b/src/page.rs new file mode 100644 index 0000000..0ad371d --- /dev/null +++ b/src/page.rs @@ -0,0 +1,17 @@ +use chrono::{DateTime, Utc}; +use megalodon::entities::Status; + +#[derive(Debug)] +pub(super) struct Page<'a> { + pub oldest_id: Option, + pub oldest: Option<&'a DateTime>, + pub newest: Option<&'a DateTime>, +} + +pub(super) fn bounds_from<'a>(statuses: &'a Vec) -> Page<'a> { + Page { + newest: statuses.first().map(|s| &s.created_at), + oldest_id: statuses.last().map(|s| s.id.clone()), + oldest: statuses.last().map(|s| &s.created_at), + } +} diff --git a/src/range.rs b/src/range.rs new file mode 100644 index 0000000..a5f1862 --- /dev/null +++ b/src/range.rs @@ -0,0 +1,43 @@ +use anyhow::{bail, format_err, Result}; +use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone}; + +#[derive(Debug)] +pub(super) struct Range { + pub start: DateTime, + pub end: DateTime, +} + +pub(super) fn try_create_range>(date: S) -> Result { + Ok(Range { + start: create_day_bound(&date, 0, 0, 0)?, + end: create_day_bound(date, 23, 59, 59)?, + }) +} + +fn create_day_bound>( + day: S, + hour: u32, + minute: u32, + second: u32, +) -> Result> { + let ts: Vec<&str> = day.as_ref().split("-").collect(); + if ts.len() != 3 { + bail!("Invalid date format! {}", day.as_ref()) + } + let (year, month, day) = if let [year, month, day, ..] = &ts[..] { + (year, month, day) + } else { + bail!("Invalid date format! {}", day.as_ref()) + }; + let b = Local.from_local_datetime( + &NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?) + .ok_or_else(|| format_err!("Invalid date!"))? + .and_hms_opt(hour, minute, second) + .ok_or_else(|| format_err!("Invalid time!"))?, + ); + if let LocalResult::Single(b) = b { + Ok(b) + } else { + bail!("Cannot construct day boundary!") + } +}