use std::fs; use std::path; use std::borrow::Cow; use std::collections::{HashMap, BTreeMap, HashSet}; use anyhow::Context; use sanitise_file_name::sanitise; use crate::rss; #[derive(Default, serde::Serialize, serde::Deserialize)] struct Specification<'a> { files : HashMap, Cow<'a, path::Path>>, feed : BTreeMap>>, image_url : Option>, } impl<'a> Specification<'a> { fn read_from(path : &path::Path) -> Result { Ok(if path.is_file() { toml::from_str(&fs::read_to_string(&path)?[..])? } else { Specification::default() }) } fn write_to(&self, path : &path::Path) -> Result<(), anyhow::Error> { Ok(fs::write(path, toml::to_string(self)?.as_bytes())?) } } #[derive(serde::Serialize, serde::Deserialize)] struct Episode<'a> { /// Episode title. title : Cow<'a, str>, /// Show notes pulled from description or summary tag. show_notes : Option>, /// This is the GUID or the URL if the GUID is not present. id : Cow<'a, str>, /// If the episode exists in the latest version of the feed. current : bool, } fn download_to_file(url : &str, path : &path::Path) -> anyhow::Result<()> { let response = minreq::get(url) .send()?; if response.status_code == 200 { fs::write(&path, response.as_bytes())?; } else { anyhow::bail!("request for episode resulted in non 200 ({}) response code", response.status_code) } Ok(()) } pub (crate) fn update_podcast( alias : &str, root : &path::Path, feed_location : &str, ) -> anyhow::Result<()> { // Create output directory let output = root.join(sanitise(&alias)); if !output.exists() { fs::create_dir(&output) .with_context(|| format!("failed to create output directory for podcast {}", alias))?; } println!(r#"info: scanning feed for "{}""#, alias); if feed_location.starts_with("http") { let feed_url = feed_location; // Get the podcast feed let response = minreq::get(feed_url) // For SquareSpace which refuses requests with no User-Agent .with_header("User-Agent", "podcast-downloader") .with_header("Accept", "*/*") .send() .with_context(|| format!(r#"error when requesting feed url "{}" for {}"#, feed_url, alias))?; if response.status_code != 200 { eprintln!(r#"error: feed "{}" for alias {} responded with non-200 ({}) status code"#, feed_url, alias, response.status_code); return Ok(()); } let feed = response.as_str()?.to_owned(); update_podcast_from_feed(&output, &feed) } else { let feed_path = root.join(feed_location); match fs::read_to_string(&feed_path) { Ok(feed) => update_podcast_from_feed(&output, &feed), Err(err) => { eprintln!(r#"error: failed to read path "{}" with error {}"#, feed_path.display(), err); Ok(()) } } } } fn extract_extension_from_url(url : &str) -> Result, url::ParseError> { let mut url_edited = url::Url::parse(url)?; url_edited.set_query(None); match url_edited.as_str().rsplit_once('.') { Some((_, extension)) => Ok(Some(extension.to_owned())), None => Ok(None), } } fn update_artwork<'a, 'b>( channel : &rss::Channel<'a>, spec : &mut Specification<'b>, output : &path::Path, ) -> anyhow::Result<()> where 'a : 'b { let image_url = match (&channel.image, &channel.itunes_image) { (Some(image), _) => Some(&image.url), (_, Some(itunes_image)) => Some(&itunes_image.href), _ => None, }; match (&spec.image_url, image_url) { // They match, so no need to change anything (Some(old), Some(new)) if old == new => (), // New and different URL (_, Some(new)) => { match extract_extension_from_url(new.as_ref()) { Ok(Some(extension)) => { let cover_path = output.join(format!("cover.{}", extension)); // Remove cover with conflicting file path if it exists if cover_path.exists() { fs::remove_file(&cover_path)?; } if let Err(err) = download_to_file(new.as_ref(), &cover_path) { eprintln!(r#"error: failed to download artwork with error "{}". skipping"#, err); } }, Ok(None) => { println!(r#"warning: could not identify file type from url "{}" for podcast artwork "{}". skipping."#, new, channel.title); } Err(err) => { println!(r#"warning: failed to parse url "{}" for "{}" artwork with error: {}. skipping."#, new, channel.title, err); }, }; spec.image_url = Some(new.clone()); }, _ => (), } Ok(()) } pub (crate) fn update_podcast_from_feed( output : &path::Path, feed : &str, ) -> anyhow::Result<()> { let feed = match xml_serde::from_str::(&feed) { Ok(feed) => feed, Err(err) => { eprintln!(r#"error: failed to parse rss feed with error: "{}""#, err); return Ok(()) } }; let channel = feed.rss.channel; let spec_file = output.join("spec.toml"); let mut spec = Specification::read_from(&spec_file)?; // Get set of all currently available episodes such that we can later mark // any other episodes as unavailable let current_episodes = { let mut current_episodes = HashSet::new(); for episode in &channel.items { let guid = episode.guid.clone().unwrap(); current_episodes.insert(guid); } current_episodes }; update_artwork( &channel, &mut spec, &output, )?; for item in channel.items { let rss::Item { title, enclosure, description, summary, guid, .. } = item; let Some(enclosure) = enclosure else { println!(r#"warning: episode "{}" does not have an enclosure tag. skipping."#, title); continue; }; let description = match (description, summary) { (Some(a), _) => Some(a), (_, Some(a)) => Some(a), _ => None, }; let guid = guid.as_deref(); let url = enclosure.url.as_ref(); let id = guid.unwrap_or(url); match spec.files.get(id) { // File already downloaded Some(path) => { // File has been deleted by another process but the specification hasn't been updated // In this case we just redownload the file // This gives an easy way to force a redownload if !output.join(path).exists() { println!(r#"info: redownloading "{}" as the file seems to have been deleted"#, title); if let Err(err) = download_to_file(enclosure.url.as_ref(), path) { eprintln!(r#"error: failed to redownload new episode with error "{}". skipping"#, err); continue; } } }, None => { let extension = match extract_extension_from_url(enclosure.url.as_ref()) { Ok(Some(extension)) => extension, Ok(None) => { println!(r#"warning: could not identify file type from url "{}" for episode "{}". skipping."#, url, title); continue; } Err(err) => { println!(r#"warning: failed to parse url "{}" for episode "{}" with error: {}. skipping."#, url, title, err); continue; }, }; let file_path = if ["mp3", "m4a", "ogg", "wav", "mp4", "m4v", "mov", "aiff"].contains(&&extension.to_lowercase()[..]) { output.join(format!("{}.{}", sanitise(&title), extension)) } else { println!("warning: unsupported file extension: {}. skipping.", extension); continue; }; // The filename happens to exist despite the episode not being downloaded. // In this case we need to construct a new filename by appending a digit to the end let file_path = if file_path.exists() { increment_file_name(&file_path).into_owned() } else { file_path }; println!(r#"info: downloading "{}" to "{}""#, title, file_path.display()); match download_to_file(enclosure.url.as_ref(), &file_path) { Ok(()) => { let file_path = file_path.canonicalize().unwrap(); spec.files.insert( Cow::from(id.to_owned()), Cow::from(file_path.strip_prefix(&output).unwrap().to_owned()), ); let episode = Episode { show_notes : description, id : Cow::from(id.to_owned()), current : true, title, }; match spec.feed.get_mut(&item.published) { Some(existing) => { existing.push(episode) }, None => { spec.feed.insert( item.published, vec![episode], ); } } }, Err(err) => { eprintln!(r#"error: failed to request episode "{}" with error "{}". skipping"#, title, err); continue; } } spec.write_to(&spec_file)?; }, } } // Setting episodes which have been removed to no longer be current for (_, episodes) in &mut spec.feed { for episode in episodes { if !current_episodes.contains(episode.id.as_ref()) { episode.current = false; } } } spec.write_to(&spec_file)?; Ok(()) } /// Given a file path `something.xyz`, returns the first path of the form /// `something(a).xyz` where `a` is a non-negative integer which does not /// currently exist, or `something.xyz` if it itself does not exist. fn increment_file_name(path : &path::Path) -> Cow<'_, path::Path> { if path.exists() { let mut new_path = path.to_owned(); let mut i : u32 = 0; while new_path.exists() { let mut stem = path.file_stem().unwrap().to_owned(); let suffix = format!("({})", i); stem.push(suffix); new_path.set_file_name(stem); if let Some(extension) = path.extension() { new_path.set_extension(extension); } i += 1; } Cow::from(new_path) } // This case can easily be removed at the cost of an extra clone, however // because this path is the most likely case by far and less computationally // expensive, it is better to check first else { Cow::from(path) } }