From b8a1c5461be433181d48183a3ef4edc5340a68f4 Mon Sep 17 00:00:00 2001 From: Thomas Gideon Date: Sat, 15 Jul 2023 14:18:27 -0400 Subject: [PATCH] Refactor --- Cargo.lock | 148 +++++++++++++++++++++++++++++--- Cargo.toml | 5 +- src/main.rs | 241 ++++++++++++++++++++++++++++++++-------------------- 3 files changed, 286 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 314a475..b71f645 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -41,6 +41,55 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +dependencies = [ + "anstyle", + "windows-sys", +] + [[package]] name = "anyhow" version = "1.0.71" @@ -177,6 +226,53 @@ dependencies = [ "winapi", ] +[[package]] +name = "clap" +version = "4.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3eab9e8ceb9afdade1ab3f0fd8dbce5b1b2f468ad653baf10e771781b2b67b73" +dependencies = [ + "clap_builder", + "clap_derive", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f2763db829349bf00cfc06251268865ed4363b93a943174f638daf3ecdba2cd" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.25", +] + +[[package]] +name = "clap_lex" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "combine" version = "4.6.6" @@ -520,6 +616,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.3.2" @@ -772,6 +874,22 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "kensho" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "clap", + "env_logger", + "html2md", + "log", + "megalodon", + "rss", + "tokio", + "tokio-stream", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -1737,6 +1855,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-tungstenite" version = "0.19.0" @@ -1883,6 +2012,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "vcpkg" version = "0.2.15" @@ -2005,19 +2140,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "watershed" -version = "0.1.0" -dependencies = [ - "anyhow", - "chrono", - "env_logger", - "html2md", - "megalodon", - "rss", - "tokio", -] - [[package]] name = "web-sys" version = "0.3.64" diff --git a/Cargo.toml b/Cargo.toml index fd914ab..e961b96 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "watershed" +name = "kensho" version = "0.1.0" edition = "2021" @@ -8,8 +8,11 @@ edition = "2021" [dependencies] anyhow = "1.0.71" chrono = "0.4.26" +clap = { version = "4.3.12", features = ["default", "derive"] } env_logger = "0.10.0" html2md = "0.2.14" +log = "0.4.19" megalodon = "0.8.3" rss = "2.0.4" tokio = { version = "1.28.2", features = ["default", "full"] } +tokio-stream = "0.1.14" diff --git a/src/main.rs b/src/main.rs index 329fd39..9120a7a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,127 +1,180 @@ -use anyhow::{format_err, Result}; -use chrono::{DateTime, Local, NaiveDate, TimeZone, Utc}; +use anyhow::{bail, format_err, Result}; +use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone, Utc}; use html2md::parse_html; +use log::{debug, trace}; use megalodon::{ - entities::status::Status, generator, megalodon::GetLocalTimelineInputOptions, - response::Response, + entities::Status, generator, megalodon::GetLocalTimelineInputOptions, response::Response, + Megalodon, }; +use tokio_stream::{iter, StreamExt}; + use std::env; #[derive(Debug)] -struct Post { - id: String, - content: String, - created_at: DateTime, +struct Range { + start: DateTime, + end: DateTime, } -// TODO implement try_from that looks for descendants and adds them -impl From<&Status> for Post { - fn from(status: &Status) -> Self { - let Status { - id, - created_at, - content, - .. - } = status; - let id = id.clone(); - let created_at = created_at.clone(); - let content = parse_html(&content); - Post { - id, - created_at, - content, - } - } +#[derive(Debug)] +struct Page<'a> { + oldest_id: Option, + oldest: Option<&'a DateTime>, + newest: Option<&'a DateTime>, } #[tokio::main] async fn main() -> Result<()> { + env::set_var("RUST_LOG", format!("{}=debug", module_path!())); env_logger::init(); // TODO add clap and argument for date - let start = Local - .from_local_datetime( - &NaiveDate::from_ymd_opt(2023, 7, 1) - .ok_or_else(|| format_err!("Invalid date!"))? - .and_hms_opt(0, 0, 0) - .expect("Failed to construct time!"), - ) - .unwrap(); + let day = try_create_range("2023-07-01")?; - let end = Local - .from_local_datetime( - &NaiveDate::from_ymd_opt(2023, 7, 1) - .ok_or_else(|| format_err!("Invallid date!"))? - .and_hms_opt(23, 59, 59) - .expect("Failed to construct time!"), - ) - .unwrap(); + debug!("Date {}", day.end.format("%Y-%m-%d")); - println!("Date {:#?}", start); + let client = create_client()?; - let url = env::var("MASTODON_URL")?; - let token = env::var("MASTODON_ACCESS_TOKEN")?; - let client = generator(megalodon::SNS::Mastodon, url, Some(token), None); - let mut max_id: Option = None; + let mut last_id_on_page: Option = None; + debug!("Fetching posts"); loop { - let Response { json, .. } = if let Some(max_id) = max_id.as_ref() { - client - .get_local_timeline(Some(&GetLocalTimelineInputOptions { - max_id: Some(max_id.clone()), - ..GetLocalTimelineInputOptions::default() - })) - .await? - } else { - client.get_local_timeline(None).await? + let json = fetch_page(&client, &last_id_on_page).await?; + let page = Page { + newest: json.first().map(|s| &s.created_at), + oldest_id: json.last().map(|s| s.id.clone()), + oldest: json.last().map(|s| &s.created_at), }; + trace!("Page bounds {:?}", page); - if let Some(last) = json.last() { - if last.created_at > start { - max_id.replace(last.id.clone()); - continue; - } + if last_id_on_page.is_some() && page_start_older_than(&page, &day) { + break; } - println!( - "{}", - json.iter() - .filter(|json| start <= json.created_at && json.created_at <= end) - .map(Post::from) - .map(|post| { - format!( - "{} ({}) + + if let Some(oldest_id) = page_newer_than(&page, &day) { + last_id_on_page.replace(oldest_id); + continue; + } + + let json = json + .clone() + .into_iter() + .filter(|json| day.start <= json.created_at && json.created_at <= day.end) + .collect::>(); + trace!("Filtered to {} post(s)", json.len()); + + let mut stream = iter(json); + + while let Some(status) = stream.next().await { + println!( + "{} > {}", - post.created_at.format("%H:%M"), - post.id, - post.content - ) - }) - .collect::>() - .join("\n\n") - ); - let context = client - .get_status_context(String::from("110638913257555200"), None) - .await?; - println!( - "{}", - context - .json + status.created_at.with_timezone(&Local).format("%H:%M"), + parse_html(&status.content) + ); + let Response { json, .. } = client.get_status_context(status.id, None).await?; + let thread = json .descendants - .iter() - .map(Post::from) - .map(|post| { + .into_iter() + .map(|status| { format!( "> -> {} ({}) +> {} >> {}", - post.created_at.format("%H:%M"), - post.id, - post.content + status.created_at.with_timezone(&Local).format("%H:%M"), + parse_html(&status.content) ) }) .collect::>() - .join("\n") - ); - break; + .join("\n"); + println!("{}", thread); + } + + if page_end_older_than(&page, &day) { + debug!("No more posts in range."); + break; + } + + if let Some(id) = page.oldest_id { + last_id_on_page.replace(id.clone()); + } } Ok(()) } + +fn create_client() -> Result> { + let url = env::var("MASTODON_URL")?; + let token = env::var("MASTODON_ACCESS_TOKEN")?; + Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None)) +} + +async fn fetch_page( + client: &Box, + last_id_on_page: &Option, +) -> Result> { + let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() { + debug!("Fetching next page"); + client + .get_local_timeline(Some(&GetLocalTimelineInputOptions { + max_id: Some(max_id.clone()), + ..GetLocalTimelineInputOptions::default() + })) + .await? + } else { + debug!("Fetching first page"); + client.get_local_timeline(None).await? + }; + Ok(json) +} + +fn try_create_range>(date: S) -> Result { + Ok(Range { + start: create_day_bound(&date, 0, 0, 0)?, + end: create_day_bound(date, 23, 59, 59)?, + }) +} + +fn create_day_bound>( + day: S, + hour: u32, + minute: u32, + second: u32, +) -> Result> { + let ts: Vec<&str> = day.as_ref().split("-").collect(); + if ts.len() != 3 { + bail!("Invalid date format! {}", day.as_ref()) + } + let (year, month, day) = if let [year, month, day, ..] = &ts[..] { + (year, month, day) + } else { + bail!("Invalid date format! {}", day.as_ref()) + }; + let b = Local.from_local_datetime( + &NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?) + .ok_or_else(|| format_err!("Invalid date!"))? + .and_hms_opt(hour, minute, second) + .ok_or_else(|| format_err!("Invalid time!"))?, + ); + if let LocalResult::Single(b) = b { + Ok(b) + } else { + bail!("Cannot construct day boundary!") + } +} + +fn page_newer_than(page: &Page, range: &Range) -> Option { + page.oldest + .filter(|oldest| *oldest > &range.end) + .and_then(|_| page.oldest_id.clone()) +} + +fn page_end_older_than(page: &Page, range: &Range) -> bool { + status_older_than(&page.oldest, &range.start) +} + +fn page_start_older_than(page: &Page, range: &Range) -> bool { + status_older_than(&page.newest, &range.start) +} + +fn status_older_than(status: &Option<&DateTime>, dt: &DateTime) -> bool { + status.map(|status| status < dt).unwrap_or_default() +}