Refactor
This commit is contained in:
parent
e9f6051f60
commit
b8a1c5461b
3 changed files with 286 additions and 108 deletions
148
Cargo.lock
generated
148
Cargo.lock
generated
|
@ -41,6 +41,55 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is-terminal",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anyhow"
|
name = "anyhow"
|
||||||
version = "1.0.71"
|
version = "1.0.71"
|
||||||
|
@ -177,6 +226,53 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.3.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3eab9e8ceb9afdade1ab3f0fd8dbce5b1b2f468ad653baf10e771781b2b67b73"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
"once_cell",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.3.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9f2763db829349bf00cfc06251268865ed4363b93a943174f638daf3ecdba2cd"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.3.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.25",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "combine"
|
name = "combine"
|
||||||
version = "4.6.6"
|
version = "4.6.6"
|
||||||
|
@ -520,6 +616,12 @@ version = "0.12.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
|
@ -772,6 +874,22 @@ dependencies = [
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "kensho"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"chrono",
|
||||||
|
"clap",
|
||||||
|
"env_logger",
|
||||||
|
"html2md",
|
||||||
|
"log",
|
||||||
|
"megalodon",
|
||||||
|
"rss",
|
||||||
|
"tokio",
|
||||||
|
"tokio-stream",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
@ -1737,6 +1855,17 @@ dependencies = [
|
||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tokio-stream"
|
||||||
|
version = "0.1.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"pin-project-lite",
|
||||||
|
"tokio",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-tungstenite"
|
name = "tokio-tungstenite"
|
||||||
version = "0.19.0"
|
version = "0.19.0"
|
||||||
|
@ -1883,6 +2012,12 @@ version = "0.7.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vcpkg"
|
name = "vcpkg"
|
||||||
version = "0.2.15"
|
version = "0.2.15"
|
||||||
|
@ -2005,19 +2140,6 @@ dependencies = [
|
||||||
"web-sys",
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "watershed"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"anyhow",
|
|
||||||
"chrono",
|
|
||||||
"env_logger",
|
|
||||||
"html2md",
|
|
||||||
"megalodon",
|
|
||||||
"rss",
|
|
||||||
"tokio",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "web-sys"
|
name = "web-sys"
|
||||||
version = "0.3.64"
|
version = "0.3.64"
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[package]
|
[package]
|
||||||
name = "watershed"
|
name = "kensho"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
@ -8,8 +8,11 @@ edition = "2021"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.71"
|
anyhow = "1.0.71"
|
||||||
chrono = "0.4.26"
|
chrono = "0.4.26"
|
||||||
|
clap = { version = "4.3.12", features = ["default", "derive"] }
|
||||||
env_logger = "0.10.0"
|
env_logger = "0.10.0"
|
||||||
html2md = "0.2.14"
|
html2md = "0.2.14"
|
||||||
|
log = "0.4.19"
|
||||||
megalodon = "0.8.3"
|
megalodon = "0.8.3"
|
||||||
rss = "2.0.4"
|
rss = "2.0.4"
|
||||||
tokio = { version = "1.28.2", features = ["default", "full"] }
|
tokio = { version = "1.28.2", features = ["default", "full"] }
|
||||||
|
tokio-stream = "0.1.14"
|
||||||
|
|
241
src/main.rs
241
src/main.rs
|
@ -1,127 +1,180 @@
|
||||||
use anyhow::{format_err, Result};
|
use anyhow::{bail, format_err, Result};
|
||||||
use chrono::{DateTime, Local, NaiveDate, TimeZone, Utc};
|
use chrono::{DateTime, Local, LocalResult, NaiveDate, TimeZone, Utc};
|
||||||
use html2md::parse_html;
|
use html2md::parse_html;
|
||||||
|
use log::{debug, trace};
|
||||||
use megalodon::{
|
use megalodon::{
|
||||||
entities::status::Status, generator, megalodon::GetLocalTimelineInputOptions,
|
entities::Status, generator, megalodon::GetLocalTimelineInputOptions, response::Response,
|
||||||
response::Response,
|
Megalodon,
|
||||||
};
|
};
|
||||||
|
use tokio_stream::{iter, StreamExt};
|
||||||
|
|
||||||
use std::env;
|
use std::env;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct Post {
|
struct Range {
|
||||||
id: String,
|
start: DateTime<Local>,
|
||||||
content: String,
|
end: DateTime<Local>,
|
||||||
created_at: DateTime<Utc>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO implement try_from that looks for descendants and adds them
|
#[derive(Debug)]
|
||||||
impl From<&Status> for Post {
|
struct Page<'a> {
|
||||||
fn from(status: &Status) -> Self {
|
oldest_id: Option<String>,
|
||||||
let Status {
|
oldest: Option<&'a DateTime<Utc>>,
|
||||||
id,
|
newest: Option<&'a DateTime<Utc>>,
|
||||||
created_at,
|
|
||||||
content,
|
|
||||||
..
|
|
||||||
} = status;
|
|
||||||
let id = id.clone();
|
|
||||||
let created_at = created_at.clone();
|
|
||||||
let content = parse_html(&content);
|
|
||||||
Post {
|
|
||||||
id,
|
|
||||||
created_at,
|
|
||||||
content,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
|
env::set_var("RUST_LOG", format!("{}=debug", module_path!()));
|
||||||
env_logger::init();
|
env_logger::init();
|
||||||
|
|
||||||
// TODO add clap and argument for date
|
// TODO add clap and argument for date
|
||||||
let start = Local
|
let day = try_create_range("2023-07-01")?;
|
||||||
.from_local_datetime(
|
|
||||||
&NaiveDate::from_ymd_opt(2023, 7, 1)
|
|
||||||
.ok_or_else(|| format_err!("Invalid date!"))?
|
|
||||||
.and_hms_opt(0, 0, 0)
|
|
||||||
.expect("Failed to construct time!"),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let end = Local
|
debug!("Date {}", day.end.format("%Y-%m-%d"));
|
||||||
.from_local_datetime(
|
|
||||||
&NaiveDate::from_ymd_opt(2023, 7, 1)
|
|
||||||
.ok_or_else(|| format_err!("Invallid date!"))?
|
|
||||||
.and_hms_opt(23, 59, 59)
|
|
||||||
.expect("Failed to construct time!"),
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
println!("Date {:#?}", start);
|
let client = create_client()?;
|
||||||
|
|
||||||
let url = env::var("MASTODON_URL")?;
|
let mut last_id_on_page: Option<String> = None;
|
||||||
let token = env::var("MASTODON_ACCESS_TOKEN")?;
|
debug!("Fetching posts");
|
||||||
let client = generator(megalodon::SNS::Mastodon, url, Some(token), None);
|
|
||||||
let mut max_id: Option<String> = None;
|
|
||||||
loop {
|
loop {
|
||||||
let Response { json, .. } = if let Some(max_id) = max_id.as_ref() {
|
let json = fetch_page(&client, &last_id_on_page).await?;
|
||||||
client
|
let page = Page {
|
||||||
.get_local_timeline(Some(&GetLocalTimelineInputOptions {
|
newest: json.first().map(|s| &s.created_at),
|
||||||
max_id: Some(max_id.clone()),
|
oldest_id: json.last().map(|s| s.id.clone()),
|
||||||
..GetLocalTimelineInputOptions::default()
|
oldest: json.last().map(|s| &s.created_at),
|
||||||
}))
|
|
||||||
.await?
|
|
||||||
} else {
|
|
||||||
client.get_local_timeline(None).await?
|
|
||||||
};
|
};
|
||||||
|
trace!("Page bounds {:?}", page);
|
||||||
|
|
||||||
if let Some(last) = json.last() {
|
if last_id_on_page.is_some() && page_start_older_than(&page, &day) {
|
||||||
if last.created_at > start {
|
break;
|
||||||
max_id.replace(last.id.clone());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
println!(
|
|
||||||
"{}",
|
if let Some(oldest_id) = page_newer_than(&page, &day) {
|
||||||
json.iter()
|
last_id_on_page.replace(oldest_id);
|
||||||
.filter(|json| start <= json.created_at && json.created_at <= end)
|
continue;
|
||||||
.map(Post::from)
|
}
|
||||||
.map(|post| {
|
|
||||||
format!(
|
let json = json
|
||||||
"{} ({})
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.filter(|json| day.start <= json.created_at && json.created_at <= day.end)
|
||||||
|
.collect::<Vec<Status>>();
|
||||||
|
trace!("Filtered to {} post(s)", json.len());
|
||||||
|
|
||||||
|
let mut stream = iter(json);
|
||||||
|
|
||||||
|
while let Some(status) = stream.next().await {
|
||||||
|
println!(
|
||||||
|
"{}
|
||||||
> {}",
|
> {}",
|
||||||
post.created_at.format("%H:%M"),
|
status.created_at.with_timezone(&Local).format("%H:%M"),
|
||||||
post.id,
|
parse_html(&status.content)
|
||||||
post.content
|
);
|
||||||
)
|
let Response { json, .. } = client.get_status_context(status.id, None).await?;
|
||||||
})
|
let thread = json
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join("\n\n")
|
|
||||||
);
|
|
||||||
let context = client
|
|
||||||
.get_status_context(String::from("110638913257555200"), None)
|
|
||||||
.await?;
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
context
|
|
||||||
.json
|
|
||||||
.descendants
|
.descendants
|
||||||
.iter()
|
.into_iter()
|
||||||
.map(Post::from)
|
.map(|status| {
|
||||||
.map(|post| {
|
|
||||||
format!(
|
format!(
|
||||||
">
|
">
|
||||||
> {} ({})
|
> {}
|
||||||
>> {}",
|
>> {}",
|
||||||
post.created_at.format("%H:%M"),
|
status.created_at.with_timezone(&Local).format("%H:%M"),
|
||||||
post.id,
|
parse_html(&status.content)
|
||||||
post.content
|
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect::<Vec<String>>()
|
.collect::<Vec<String>>()
|
||||||
.join("\n")
|
.join("\n");
|
||||||
);
|
println!("{}", thread);
|
||||||
break;
|
}
|
||||||
|
|
||||||
|
if page_end_older_than(&page, &day) {
|
||||||
|
debug!("No more posts in range.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(id) = page.oldest_id {
|
||||||
|
last_id_on_page.replace(id.clone());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn create_client() -> Result<Box<dyn Megalodon + Send + Sync>> {
|
||||||
|
let url = env::var("MASTODON_URL")?;
|
||||||
|
let token = env::var("MASTODON_ACCESS_TOKEN")?;
|
||||||
|
Ok(generator(megalodon::SNS::Mastodon, url, Some(token), None))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn fetch_page(
|
||||||
|
client: &Box<dyn Megalodon + Send + Sync>,
|
||||||
|
last_id_on_page: &Option<String>,
|
||||||
|
) -> Result<Vec<Status>> {
|
||||||
|
let Response { json, .. } = if let Some(max_id) = last_id_on_page.as_ref() {
|
||||||
|
debug!("Fetching next page");
|
||||||
|
client
|
||||||
|
.get_local_timeline(Some(&GetLocalTimelineInputOptions {
|
||||||
|
max_id: Some(max_id.clone()),
|
||||||
|
..GetLocalTimelineInputOptions::default()
|
||||||
|
}))
|
||||||
|
.await?
|
||||||
|
} else {
|
||||||
|
debug!("Fetching first page");
|
||||||
|
client.get_local_timeline(None).await?
|
||||||
|
};
|
||||||
|
Ok(json)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_create_range<S: AsRef<str>>(date: S) -> Result<Range> {
|
||||||
|
Ok(Range {
|
||||||
|
start: create_day_bound(&date, 0, 0, 0)?,
|
||||||
|
end: create_day_bound(date, 23, 59, 59)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_day_bound<S: AsRef<str>>(
|
||||||
|
day: S,
|
||||||
|
hour: u32,
|
||||||
|
minute: u32,
|
||||||
|
second: u32,
|
||||||
|
) -> Result<DateTime<Local>> {
|
||||||
|
let ts: Vec<&str> = day.as_ref().split("-").collect();
|
||||||
|
if ts.len() != 3 {
|
||||||
|
bail!("Invalid date format! {}", day.as_ref())
|
||||||
|
}
|
||||||
|
let (year, month, day) = if let [year, month, day, ..] = &ts[..] {
|
||||||
|
(year, month, day)
|
||||||
|
} else {
|
||||||
|
bail!("Invalid date format! {}", day.as_ref())
|
||||||
|
};
|
||||||
|
let b = Local.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(year.parse()?, month.parse()?, day.parse()?)
|
||||||
|
.ok_or_else(|| format_err!("Invalid date!"))?
|
||||||
|
.and_hms_opt(hour, minute, second)
|
||||||
|
.ok_or_else(|| format_err!("Invalid time!"))?,
|
||||||
|
);
|
||||||
|
if let LocalResult::Single(b) = b {
|
||||||
|
Ok(b)
|
||||||
|
} else {
|
||||||
|
bail!("Cannot construct day boundary!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn page_newer_than(page: &Page, range: &Range) -> Option<String> {
|
||||||
|
page.oldest
|
||||||
|
.filter(|oldest| *oldest > &range.end)
|
||||||
|
.and_then(|_| page.oldest_id.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn page_end_older_than(page: &Page, range: &Range) -> bool {
|
||||||
|
status_older_than(&page.oldest, &range.start)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn page_start_older_than(page: &Page, range: &Range) -> bool {
|
||||||
|
status_older_than(&page.newest, &range.start)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn status_older_than(status: &Option<&DateTime<Utc>>, dt: &DateTime<Local>) -> bool {
|
||||||
|
status.map(|status| status < dt).unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue