Handle image attachments, refactor more

This commit is contained in:
Thomas Gideon 2023-07-15 19:40:16 -04:00
parent 01335b6929
commit c90495a985
6 changed files with 334 additions and 111 deletions

138
Cargo.lock generated
View file

@ -267,6 +267,17 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
[[package]]
name = "clipboard-win"
version = "4.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362"
dependencies = [
"error-code",
"str-buf",
"winapi",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
@ -418,6 +429,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "endian-type"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
[[package]]
name = "env_logger"
version = "0.10.0"
@ -452,6 +469,16 @@ dependencies = [
"libc",
]
[[package]]
name = "error-code"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21"
dependencies = [
"libc",
"str-buf",
]
[[package]]
name = "fastrand"
version = "1.9.0"
@ -461,6 +488,17 @@ dependencies = [
"instant",
]
[[package]]
name = "fd-lock"
version = "3.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
dependencies = [
"cfg-if",
"rustix 0.38.4",
"windows-sys",
]
[[package]]
name = "fnv"
version = "1.0.7"
@ -634,6 +672,15 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "home"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb"
dependencies = [
"windows-sys",
]
[[package]]
name = "html2md"
version = "0.2.14"
@ -886,8 +933,11 @@ dependencies = [
"log",
"megalodon",
"rss",
"rustyline",
"tokio",
"tokio-stream",
"url",
"url_open",
]
[[package]]
@ -1059,6 +1109,27 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "nibble_vec"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43"
dependencies = [
"smallvec",
]
[[package]]
name = "nix"
version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
dependencies = [
"bitflags 1.3.2",
"cfg-if",
"libc",
"static_assertions",
]
[[package]]
name = "num-traits"
version = "0.2.15"
@ -1282,6 +1353,16 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "radix_trie"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd"
dependencies = [
"endian-type",
"nibble_vec",
]
[[package]]
name = "rand"
version = "0.8.5"
@ -1486,6 +1567,29 @@ dependencies = [
"untrusted",
]
[[package]]
name = "rustyline"
version = "12.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9"
dependencies = [
"bitflags 2.3.3",
"cfg-if",
"clipboard-win",
"fd-lock",
"home",
"libc",
"log",
"memchr",
"nix",
"radix_trie",
"scopeguard",
"unicode-segmentation",
"unicode-width",
"utf8parse",
"winapi",
]
[[package]]
name = "ryu"
version = "1.0.14"
@ -1670,6 +1774,18 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "str-buf"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0"
[[package]]
name = "string_cache"
version = "0.8.7"
@ -1982,6 +2098,18 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-segmentation"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "untrusted"
version = "0.7.1"
@ -2000,6 +2128,16 @@ dependencies = [
"serde",
]
[[package]]
name = "url_open"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8c5b0472f1c021d8989aa63984084fc6c2d8624bc676db5c95cc14faff2ad6c"
dependencies = [
"url",
"winapi",
]
[[package]]
name = "urlencoding"
version = "2.1.2"

View file

@ -14,5 +14,8 @@ html2md = "0.2.14"
log = "0.4.19"
megalodon = "0.8.3"
rss = "2.0.4"
rustyline = "12.0.0"
tokio = { version = "1.28.2", features = ["default", "full"] }
tokio-stream = "0.1.14"
url = "2.4.0"
url_open = "0.0.2"

81
src/format.rs Normal file
View file

@ -0,0 +1,81 @@
use anyhow::Result;
use chrono::Local;
use html2md::parse_html;
use log::debug;
use megalodon::{
entities::{Account, Attachment, Status},
response::Response,
Megalodon,
};
use rustyline::DefaultEditor;
use url::Url;
use url_open::UrlOpen;
pub(super) async fn format_status(
client: &Box<dyn Megalodon + Send + Sync>,
account: &Account,
status: &Status,
) -> Result<String> {
let ancestor = format!(
"{}
> {}{}",
status.created_at.with_timezone(&Local).format("%H:%M"),
parse_html(&status.content).trim(),
format_attachments(1, &status.media_attachments)
);
let Response { json, .. } = client.get_status_context(status.id.clone(), None).await?;
let thread = json
.descendants
.into_iter()
.filter(|s| {
s.in_reply_to_account_id == Some(account.id.clone()) && s.account.id == account.id
})
.map(|status| {
format!(
">
> {}
>> {}{}",
status.created_at.with_timezone(&Local).format("%H:%M"),
parse_html(&status.content).trim(),
format_attachments(2, &status.media_attachments)
)
})
.collect::<Vec<String>>()
.join("\n");
Ok(format!(
"{}{}",
ancestor,
if !thread.is_empty() {
thread
} else {
String::default()
}
))
}
fn format_attachments(depth: usize, attachments: &[Attachment]) -> String {
let prefix = vec![">"; depth].join("");
debug!("Attachments {:?}", attachments);
if attachments.is_empty() {
String::default()
} else {
let mut editor = DefaultEditor::new().unwrap();
attachments
.iter()
.map(|a| {
Url::parse(&a.url).unwrap().open();
let src = if let Ok(line) = editor.readline("Filename: ") {
line
} else {
a.url.clone()
};
format!(
"
{} <img src=\"{}\" />",
prefix, src
)
})
.collect::<Vec<String>>()
.join("\n")
}
}

View file

@ -1,28 +1,29 @@
use anyhow::{bail, format_err, Result};
use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone, Utc};
use anyhow::Result;
use chrono::{DateTime, Local, Utc};
use clap::{arg, command, Parser};
use html2md::parse_html;
use log::{debug, trace};
use megalodon::{
entities::Status, generator, megalodon::GetLocalTimelineInputOptions, response::Response,
entities::{Account, Status},
generator,
megalodon::GetLocalTimelineInputOptions,
response::Response,
Megalodon,
};
use tokio_stream::{iter, StreamExt};
use std::env;
#[derive(Debug)]
struct Range {
start: DateTime<Local>,
end: DateTime<Local>,
}
use self::{
format::format_status,
page::{bounds_from, Page},
range::try_create_range,
range::Range,
};
#[derive(Debug)]
struct Page<'a> {
oldest_id: Option<String>,
oldest: Option<&'a DateTime<Utc>>,
newest: Option<&'a DateTime<Utc>>,
}
mod format;
mod page;
mod range;
#[derive(Debug, Parser)]
#[command()]
@ -36,89 +37,54 @@ struct Config {
#[tokio::main]
async fn main() -> Result<()> {
let Config { date, verbose } = Config::parse();
if verbose {
env::set_var("RUST_LOG", format!("{}=debug", module_path!()));
} else {
env::set_var("RUST_LOG", format!("{}=none", module_path!()));
}
let level = if verbose { "debug" } else { "off" };
env::set_var("RUST_LOG", format!("{}={}", module_path!(), level));
env_logger::init();
let day = try_create_range(date)?;
debug!("Date {}", day.end.format("%Y-%m-%d"));
let client = create_client()?;
let Response { json: account, .. } = client.verify_account_credentials().await?;
debug!("Fetching posts for date, {}.", day.end.format("%Y-%m-%d"));
// the server only provides a page of results at a time, keep the oldest status from any page
// to request the next older page of statuses
let mut last_id_on_page: Option<String> = None;
debug!("Fetching posts");
// store the formatted posts in server order, reversed chronologically, to reverse at the end
// for regular chronological ordering
let mut reversed = Vec::new();
loop {
let json = fetch_page(&client, &last_id_on_page).await?;
let page = Page {
newest: json.first().map(|s| &s.created_at),
oldest_id: json.last().map(|s| s.id.clone()),
oldest: json.last().map(|s| &s.created_at),
};
let statuses = fetch_page(&client, &last_id_on_page).await?;
if statuses.is_empty() {
debug!("No more posts in range.");
break;
}
let page = bounds_from(&statuses);
trace!("Page bounds {:?}", page);
// this age comparison only applies after the first page is fetched; the rest of the loop
// body handles if the requested date is newer than any statuses on the first page
if last_id_on_page.is_some() && page_start_older_than(&page, &day) {
break;
}
// fetching returns 20 at a time, in reverse chronological order so may require skipping
// pages after the requested date
if let Some(oldest_id) = page_newer_than(&page, &day) {
last_id_on_page.replace(oldest_id);
continue;
}
let json = json
.clone()
.into_iter()
.filter(|json| {
json.account.id == account.id
&& day.start <= json.created_at
&& json.created_at <= day.end
})
.collect::<Vec<Status>>();
trace!("Filtered to {} post(s)", json.len());
let mut stream = iter(json);
// mapping the vector runs into thorny ownership issues and only produces futures, not
// resolved values; a for in loop works with await but also runs into thorny ownership
// issues; a stream resolves both because the stream takes ownership of the statuses and
// can be iterated in a simple way that allows the use of await in the body
let mut stream = iter(filter_statuses(&account, &day, &statuses));
while let Some(status) = stream.next().await {
let ancestor = format!(
"{}
> {}",
status.created_at.with_timezone(&Local).format("%H:%M"),
parse_html(&status.content).trim()
);
let Response { json, .. } = client.get_status_context(status.id, None).await?;
let thread = json
.descendants
.into_iter()
.filter(|s| {
s.in_reply_to_account_id == Some(account.id.clone())
&& s.account.id == account.id
})
.map(|status| {
format!(
">
> {}
>> {}",
status.created_at.with_timezone(&Local).format("%H:%M"),
parse_html(&status.content).trim()
)
})
.collect::<Vec<String>>()
.join("\n");
reversed.push(format!(
"{}{}",
ancestor,
if !thread.is_empty() {
thread
} else {
String::default()
}
));
reversed.push(format_status(&client, &account, status).await?);
}
if page_end_older_than(&page, &day) {
@ -135,6 +101,16 @@ async fn main() -> Result<()> {
Ok(())
}
fn filter_statuses<'a>(account: &Account, day: &Range, json: &'a Vec<Status>) -> Vec<&'a Status> {
json.iter()
.filter(|json| {
json.account.id == account.id
&& day.start <= json.created_at
&& json.created_at <= day.end
})
.collect::<Vec<&Status>>()
}
fn create_client() -> Result<Box<dyn Megalodon + Send + Sync>> {
let url = env::var("MASTODON_URL")?;
let token = env::var("MASTODON_ACCESS_TOKEN")?;
@ -160,41 +136,6 @@ async fn fetch_page(
Ok(json)
}
fn try_create_range<S: AsRef<str>>(date: S) -> Result<Range> {
Ok(Range {
start: create_day_bound(&date, 0, 0, 0)?,
end: create_day_bound(date, 23, 59, 59)?,
})
}
fn create_day_bound<S: AsRef<str>>(
day: S,
hour: u32,
minute: u32,
second: u32,
) -> Result<DateTime<Local>> {
let ts: Vec<&str> = day.as_ref().split("-").collect();
if ts.len() != 3 {
bail!("Invalid date format! {}", day.as_ref())
}
let (year, month, day) = if let [year, month, day, ..] = &ts[..] {
(year, month, day)
} else {
bail!("Invalid date format! {}", day.as_ref())
};
let b = Local.from_local_datetime(
&NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?)
.ok_or_else(|| format_err!("Invalid date!"))?
.and_hms_opt(hour, minute, second)
.ok_or_else(|| format_err!("Invalid time!"))?,
);
if let LocalResult::Single(b) = b {
Ok(b)
} else {
bail!("Cannot construct day boundary!")
}
}
fn page_newer_than(page: &Page, range: &Range) -> Option<String> {
page.oldest
.filter(|oldest| *oldest > &range.end)

17
src/page.rs Normal file
View file

@ -0,0 +1,17 @@
use chrono::{DateTime, Utc};
use megalodon::entities::Status;
#[derive(Debug)]
pub(super) struct Page<'a> {
pub oldest_id: Option<String>,
pub oldest: Option<&'a DateTime<Utc>>,
pub newest: Option<&'a DateTime<Utc>>,
}
pub(super) fn bounds_from<'a>(statuses: &'a Vec<Status>) -> Page<'a> {
Page {
newest: statuses.first().map(|s| &s.created_at),
oldest_id: statuses.last().map(|s| s.id.clone()),
oldest: statuses.last().map(|s| &s.created_at),
}
}

43
src/range.rs Normal file
View file

@ -0,0 +1,43 @@
use anyhow::{bail, format_err, Result};
use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone};
#[derive(Debug)]
pub(super) struct Range {
pub start: DateTime<Local>,
pub end: DateTime<Local>,
}
pub(super) fn try_create_range<S: AsRef<str>>(date: S) -> Result<Range> {
Ok(Range {
start: create_day_bound(&date, 0, 0, 0)?,
end: create_day_bound(date, 23, 59, 59)?,
})
}
fn create_day_bound<S: AsRef<str>>(
day: S,
hour: u32,
minute: u32,
second: u32,
) -> Result<DateTime<Local>> {
let ts: Vec<&str> = day.as_ref().split("-").collect();
if ts.len() != 3 {
bail!("Invalid date format! {}", day.as_ref())
}
let (year, month, day) = if let [year, month, day, ..] = &ts[..] {
(year, month, day)
} else {
bail!("Invalid date format! {}", day.as_ref())
};
let b = Local.from_local_datetime(
&NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?)
.ok_or_else(|| format_err!("Invalid date!"))?
.and_hms_opt(hour, minute, second)
.ok_or_else(|| format_err!("Invalid time!"))?,
);
if let LocalResult::Single(b) = b {
Ok(b)
} else {
bail!("Cannot construct day boundary!")
}
}