2025-09-01 10:07:28 +10:00
|
|
|
use std::{
|
|
|
|
|
fs,
|
|
|
|
|
path,
|
|
|
|
|
borrow::Cow,
|
|
|
|
|
collections::HashSet,
|
|
|
|
|
};
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
use anyhow::Context;
|
|
|
|
|
use sanitise_file_name::sanitise;
|
|
|
|
|
|
2025-09-01 10:07:28 +10:00
|
|
|
use crate::{
|
|
|
|
|
rss,
|
2025-09-21 09:07:18 +10:00
|
|
|
input,
|
2025-09-01 10:07:28 +10:00
|
|
|
folders,
|
|
|
|
|
manage::{
|
|
|
|
|
Specification,
|
|
|
|
|
Episode,
|
|
|
|
|
},
|
|
|
|
|
};
|
2024-01-11 17:30:55 +11:00
|
|
|
|
2025-08-29 21:33:20 +10:00
|
|
|
fn download_to_file(url: &str, path: &path::Path) -> anyhow::Result<()> {
|
2024-01-11 17:30:55 +11:00
|
|
|
let response = minreq::get(url)
|
|
|
|
|
.send()?;
|
|
|
|
|
|
|
|
|
|
if response.status_code == 200 {
|
|
|
|
|
fs::write(&path, response.as_bytes())?;
|
|
|
|
|
} else {
|
|
|
|
|
anyhow::bail!("request for episode resulted in non 200 ({}) response code", response.status_code)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-29 21:33:20 +10:00
|
|
|
pub(crate) fn update_podcast(
|
|
|
|
|
alias: &str,
|
|
|
|
|
root: &path::Path,
|
2025-09-21 09:07:18 +10:00
|
|
|
source: &input::Source,
|
2024-01-11 17:30:55 +11:00
|
|
|
) -> anyhow::Result<()> {
|
|
|
|
|
|
2025-09-21 09:07:18 +10:00
|
|
|
if source.skip_download() {
|
|
|
|
|
println!(r#"[info] skipping download for "{}""#, alias);
|
|
|
|
|
return Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2024-01-11 17:30:55 +11:00
|
|
|
// Create output directory
|
2025-08-29 21:46:22 +10:00
|
|
|
let output = folders::podcast_folder(root, alias);
|
2024-01-11 17:30:55 +11:00
|
|
|
if !output.exists() {
|
2025-08-29 21:46:22 +10:00
|
|
|
fs::create_dir_all(&output)
|
|
|
|
|
.context(format!("failed to create output directory for podcast {}", alias))?;
|
2024-01-11 17:30:55 +11:00
|
|
|
}
|
|
|
|
|
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[info] scanning feed for "{}""#, alias);
|
2024-01-11 17:30:55 +11:00
|
|
|
|
2025-09-21 09:07:18 +10:00
|
|
|
match source.source() {
|
|
|
|
|
input::SourceKind::Url(feed_url) => {
|
|
|
|
|
// Get the podcast feed
|
|
|
|
|
let response = minreq::get(feed_url)
|
|
|
|
|
// For SquareSpace which refuses requests with no User-Agent
|
|
|
|
|
.with_header("User-Agent", "podcast-downloader")
|
|
|
|
|
.with_header("Accept", "*/*")
|
|
|
|
|
.send()
|
|
|
|
|
.context(format!(r#"error when requesting feed url "{}" for {}"#, feed_url, alias))?;
|
|
|
|
|
|
|
|
|
|
if response.status_code != 200 {
|
|
|
|
|
eprintln!(r#"[error] feed "{}" for alias {} responded with non-200 ({}) status code"#, feed_url, alias, response.status_code);
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
2024-01-11 17:30:55 +11:00
|
|
|
|
2025-09-21 09:07:18 +10:00
|
|
|
let feed = response.as_str()?.to_owned();
|
|
|
|
|
update_podcast_from_feed(&output, &feed)
|
2024-01-11 17:30:55 +11:00
|
|
|
}
|
2025-09-21 09:07:18 +10:00
|
|
|
input::SourceKind::Path(feed_location) => {
|
|
|
|
|
let feed_path = root.join(feed_location);
|
|
|
|
|
match fs::read_to_string(&feed_path) {
|
|
|
|
|
Ok(feed) => update_podcast_from_feed(&output, &feed),
|
|
|
|
|
Err(err) => {
|
|
|
|
|
eprintln!(r#"[error] failed to read path "{}" with error {}"#, feed_path.display(), err);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2024-01-11 17:30:55 +11:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-29 21:33:20 +10:00
|
|
|
fn extract_extension_from_url(url: &str) -> Result<Option<String>, url::ParseError> {
|
2024-01-11 17:30:55 +11:00
|
|
|
let mut url_edited = url::Url::parse(url)?;
|
|
|
|
|
url_edited.set_query(None);
|
|
|
|
|
|
|
|
|
|
match url_edited.as_str().rsplit_once('.') {
|
|
|
|
|
Some((_, extension)) => Ok(Some(extension.to_owned())),
|
|
|
|
|
None => Ok(None),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn update_artwork<'a, 'b>(
|
2025-08-29 21:33:20 +10:00
|
|
|
channel: &rss::Channel<'a>,
|
|
|
|
|
spec: &mut Specification<'b>,
|
|
|
|
|
output: &path::Path,
|
|
|
|
|
) -> anyhow::Result<()> where 'a: 'b {
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
let image_url = match (&channel.image, &channel.itunes_image) {
|
|
|
|
|
(Some(image), _) => Some(&image.url),
|
|
|
|
|
(_, Some(itunes_image)) => Some(&itunes_image.href),
|
|
|
|
|
_ => None,
|
|
|
|
|
};
|
|
|
|
|
|
2025-09-01 10:07:28 +10:00
|
|
|
match (spec.image_url.as_deref(), image_url) {
|
2024-01-11 17:30:55 +11:00
|
|
|
// They match, so no need to change anything
|
|
|
|
|
(Some(old), Some(new)) if old == new => (),
|
|
|
|
|
// New and different URL
|
|
|
|
|
(_, Some(new)) => {
|
|
|
|
|
|
|
|
|
|
match extract_extension_from_url(new.as_ref()) {
|
|
|
|
|
Ok(Some(extension)) => {
|
2025-08-29 21:33:20 +10:00
|
|
|
let cover_path = output.join(format!("cover-original.{}", extension));
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
// Remove cover with conflicting file path if it exists
|
|
|
|
|
if cover_path.exists() {
|
|
|
|
|
fs::remove_file(&cover_path)?;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if let Err(err) = download_to_file(new.as_ref(), &cover_path) {
|
2024-01-21 13:48:08 +11:00
|
|
|
eprintln!(r#"[error] failed to download artwork with error "{}". skipping"#, err);
|
2024-01-11 17:30:55 +11:00
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
Ok(None) => {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[warning] could not identify file type from url "{}" for podcast artwork "{}". skipping."#, new, channel.title);
|
2024-01-11 17:30:55 +11:00
|
|
|
}
|
|
|
|
|
Err(err) => {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[warning] failed to parse url "{}" for "{}" artwork with error: {}. skipping."#, new, channel.title, err);
|
2024-01-11 17:30:55 +11:00
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
spec.image_url = Some(new.clone());
|
|
|
|
|
},
|
|
|
|
|
_ => (),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2025-08-29 21:33:20 +10:00
|
|
|
pub(crate) fn update_podcast_from_feed(
|
|
|
|
|
output: &path::Path,
|
|
|
|
|
feed: &str,
|
2024-01-11 17:30:55 +11:00
|
|
|
) -> anyhow::Result<()> {
|
|
|
|
|
|
|
|
|
|
let feed = match xml_serde::from_str::<rss::Feed>(&feed) {
|
|
|
|
|
Ok(feed) => feed,
|
|
|
|
|
Err(err) => {
|
2024-01-21 13:48:08 +11:00
|
|
|
eprintln!(r#"[error] failed to parse rss feed with error: "{}""#, err);
|
2024-01-11 17:30:55 +11:00
|
|
|
return Ok(())
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let channel = feed.rss.channel;
|
|
|
|
|
|
2025-09-21 09:07:18 +10:00
|
|
|
let spec_file = output.join(folders::SPEC_FILE);
|
2024-01-11 17:30:55 +11:00
|
|
|
|
2024-11-18 21:44:36 +11:00
|
|
|
let mut spec = Specification::read_from_with_default(&spec_file)?;
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
// Get set of all currently available episodes such that we can later mark
|
|
|
|
|
// any other episodes as unavailable
|
|
|
|
|
let current_episodes = {
|
|
|
|
|
let mut current_episodes = HashSet::new();
|
|
|
|
|
for episode in &channel.items {
|
|
|
|
|
let guid = episode.guid.clone().unwrap();
|
|
|
|
|
current_episodes.insert(guid);
|
|
|
|
|
}
|
|
|
|
|
current_episodes
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
update_artwork(
|
|
|
|
|
&channel,
|
|
|
|
|
&mut spec,
|
|
|
|
|
&output,
|
|
|
|
|
)?;
|
|
|
|
|
|
|
|
|
|
for item in channel.items {
|
|
|
|
|
|
|
|
|
|
let rss::Item {
|
|
|
|
|
title,
|
|
|
|
|
enclosure,
|
|
|
|
|
description,
|
|
|
|
|
summary,
|
|
|
|
|
guid,
|
|
|
|
|
..
|
|
|
|
|
} = item;
|
|
|
|
|
|
|
|
|
|
let Some(enclosure) = enclosure else {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[warning] episode "{}" does not have an enclosure tag. skipping."#, title);
|
2024-01-11 17:30:55 +11:00
|
|
|
continue;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let description = match (description, summary) {
|
|
|
|
|
(Some(a), _) => Some(a),
|
|
|
|
|
(_, Some(a)) => Some(a),
|
|
|
|
|
_ => None,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let guid = guid.as_deref();
|
|
|
|
|
let url = enclosure.url.as_ref();
|
|
|
|
|
|
|
|
|
|
let id = guid.unwrap_or(url);
|
|
|
|
|
|
2025-09-01 10:07:28 +10:00
|
|
|
match spec.path_from_id(id) {
|
2024-01-11 17:30:55 +11:00
|
|
|
// File already downloaded
|
|
|
|
|
Some(path) => {
|
|
|
|
|
// File has been deleted by another process but the specification hasn't been updated
|
|
|
|
|
// In this case we just redownload the file
|
|
|
|
|
// This gives an easy way to force a redownload
|
|
|
|
|
if !output.join(path).exists() {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[info] redownloading "{}" as the file seems to have been deleted"#, title);
|
2024-01-11 17:30:55 +11:00
|
|
|
if let Err(err) = download_to_file(enclosure.url.as_ref(), path) {
|
2024-01-21 13:48:08 +11:00
|
|
|
eprintln!(r#"[error] failed to redownload new episode with error: "{}". skipping"#, err);
|
2024-01-11 17:30:55 +11:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
None => {
|
|
|
|
|
|
|
|
|
|
let extension = match extract_extension_from_url(enclosure.url.as_ref()) {
|
|
|
|
|
Ok(Some(extension)) => extension,
|
|
|
|
|
Ok(None) => {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[warning] could not identify file type from url "{}" for episode "{}". skipping."#, url, title);
|
2024-01-11 17:30:55 +11:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
Err(err) => {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!(r#"[warning] failed to parse url "{}" for episode "{}" with error: {}. skipping."#, url, title, err);
|
2024-01-11 17:30:55 +11:00
|
|
|
continue;
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let file_path = if ["mp3", "m4a", "ogg", "wav", "mp4", "m4v", "mov", "aiff"].contains(&&extension.to_lowercase()[..]) {
|
|
|
|
|
output.join(format!("{}.{}", sanitise(&title), extension))
|
|
|
|
|
} else {
|
2024-01-21 13:48:08 +11:00
|
|
|
println!("[warning] unsupported file extension: {}. skipping.", extension);
|
2024-01-11 17:30:55 +11:00
|
|
|
continue;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// The filename happens to exist despite the episode not being downloaded.
|
|
|
|
|
// In this case we need to construct a new filename by appending a digit to the end
|
|
|
|
|
let file_path = if file_path.exists() {
|
|
|
|
|
increment_file_name(&file_path).into_owned()
|
|
|
|
|
} else { file_path };
|
|
|
|
|
|
2024-11-18 20:29:29 +11:00
|
|
|
println!(r#"[info] downloading "{}""#, title);
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
match download_to_file(enclosure.url.as_ref(), &file_path) {
|
|
|
|
|
Ok(()) => {
|
|
|
|
|
let file_path = file_path.canonicalize().unwrap();
|
2025-09-21 09:07:18 +10:00
|
|
|
let relative_path = file_path.strip_prefix(&output).unwrap();
|
2024-01-11 17:30:55 +11:00
|
|
|
|
2025-09-21 09:07:18 +10:00
|
|
|
if let Some(previous) = spec.insert_into_files(
|
2025-09-01 10:07:28 +10:00
|
|
|
id.to_owned(),
|
2025-09-21 09:07:18 +10:00
|
|
|
relative_path.to_owned(),
|
|
|
|
|
) {
|
|
|
|
|
println!("[warning] duplicate id {:?} for episodes {:?} and {:?}", id, previous, relative_path);
|
|
|
|
|
|
|
|
|
|
// Revert to the previous file
|
|
|
|
|
spec.insert_into_files(id.to_owned(), previous);
|
|
|
|
|
// Delete the newly downloaded file
|
|
|
|
|
fs::remove_file(file_path)?;
|
|
|
|
|
// Skip
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2024-01-11 17:30:55 +11:00
|
|
|
|
2025-09-01 10:07:28 +10:00
|
|
|
let episode = Episode::new_downloaded(title, description, id.to_owned());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spec.insert_into_feed(item.published, episode);
|
2024-02-17 09:00:51 +11:00
|
|
|
|
|
|
|
|
// Update the file as we go, but only if a change has occured
|
|
|
|
|
spec.write_to(&spec_file)?;
|
2024-01-11 17:30:55 +11:00
|
|
|
},
|
|
|
|
|
Err(err) => {
|
2024-01-21 13:48:08 +11:00
|
|
|
eprintln!(r#"[error] failed to request episode "{}" with error: "{}". skipping"#, title, err);
|
2024-01-11 17:30:55 +11:00
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-17 09:00:51 +11:00
|
|
|
let mut feed_change = false;
|
2024-01-11 17:30:55 +11:00
|
|
|
// Setting episodes which have been removed to no longer be current
|
2025-09-01 10:07:28 +10:00
|
|
|
for (_, episodes) in spec.feed_iter_mut() {
|
2024-01-11 17:30:55 +11:00
|
|
|
for episode in episodes {
|
2025-09-01 10:07:28 +10:00
|
|
|
if !current_episodes.contains(episode.id()) {
|
2024-01-11 17:30:55 +11:00
|
|
|
episode.current = false;
|
2024-02-17 09:00:51 +11:00
|
|
|
feed_change = true;
|
2024-01-11 17:30:55 +11:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-17 09:00:51 +11:00
|
|
|
if feed_change {
|
|
|
|
|
spec.write_to(&spec_file)?;
|
|
|
|
|
}
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Given a file path `something.xyz`, returns the first path of the form
|
|
|
|
|
/// `something(a).xyz` where `a` is a non-negative integer which does not
|
|
|
|
|
/// currently exist, or `something.xyz` if it itself does not exist.
|
2025-08-29 21:33:20 +10:00
|
|
|
fn increment_file_name(path: &path::Path) -> Cow<'_, path::Path> {
|
2024-01-11 17:30:55 +11:00
|
|
|
|
|
|
|
|
if path.exists() {
|
|
|
|
|
let mut new_path = path.to_owned();
|
|
|
|
|
|
2025-08-29 21:33:20 +10:00
|
|
|
let mut i: u32 = 0;
|
2024-01-11 17:30:55 +11:00
|
|
|
while new_path.exists() {
|
|
|
|
|
let mut stem = path.file_stem().unwrap().to_owned();
|
|
|
|
|
|
|
|
|
|
let suffix = format!("({})", i);
|
|
|
|
|
stem.push(suffix);
|
|
|
|
|
|
|
|
|
|
new_path.set_file_name(stem);
|
|
|
|
|
if let Some(extension) = path.extension() {
|
|
|
|
|
new_path.set_extension(extension);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Cow::from(new_path)
|
|
|
|
|
}
|
|
|
|
|
// This case can easily be removed at the cost of an extra clone, however
|
|
|
|
|
// because this path is the most likely case by far and less computationally
|
|
|
|
|
// expensive, it is better to check first
|
|
|
|
|
else {
|
|
|
|
|
Cow::from(path)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|