use std::fs; use std::path; use std::borrow::Cow; use std::iter::Iterator; use std::collections::{HashMap, BTreeMap, HashSet}; use anyhow::Context; use sanitise_file_name::sanitise; use crate::folders; use crate::rss; #[derive(Debug, Default, serde::Serialize, serde::Deserialize)] pub(crate) struct Specification<'a> { files: HashMap, Cow<'a, path::Path>>, /// This is a collection of episodes, where each entry contains a `Vec` of /// episodes to allow for the possibility that multiple episodes have the /// same timestamp. feed: BTreeMap>>, image_url: Option>, } impl<'a> Specification<'a> { pub(crate) fn read_from_with_default(path: &path::Path) -> Result { Ok(if path.is_file() { toml::from_str(&fs::read_to_string(&path)?[..])? } else { Specification::default() }) } pub(crate) fn read_from(path: &path::Path) -> Result { Ok(if path.is_file() { toml::from_str(&fs::read_to_string(&path)?[..])? } else { anyhow::bail!("could not find specification for the desired podcast") }) } pub(crate) fn write_to(&self, path: &path::Path) -> Result<(), anyhow::Error> { Ok(fs::write(path, toml::to_string(self)?.as_bytes())?) } pub(crate) fn feed_iter(&self) -> impl Iterator>)> { self.feed.iter() } pub(crate) fn feed_iter_mut(&mut self) -> impl Iterator>)> { self.feed.iter_mut() } pub(crate) fn path_from_id(&self, id: &str) -> Option<&path::Path> { self.files.get(id).map(|v| &**v) } pub(crate) fn feed(&self) -> &BTreeMap>> { &self.feed } pub(crate) fn into_feed_and_files(self) -> (BTreeMap>>, HashMap, Cow<'a, path::Path>>) { (self.feed, self.files) } } #[derive(Debug, serde::Serialize, serde::Deserialize)] pub (crate) struct Episode<'a> { /// Episode title. title: Cow<'a, str>, /// Show notes pulled from description or summary tag. show_notes: Option>, /// This is the GUID or the URL if the GUID is not present. id: Cow<'a, str>, /// If the episode exists in the latest version of the feed. current: bool, /// Flag to keep track of which episodes have been listened to. #[serde(default)] pub(crate) listened: bool, } impl<'a> Episode<'a> { pub (crate) fn title(&self) -> &str { self.title.as_ref() } pub(crate) fn id(&self) -> &str { &self.id } } fn download_to_file(url: &str, path: &path::Path) -> anyhow::Result<()> { let response = minreq::get(url) .send()?; if response.status_code == 200 { fs::write(&path, response.as_bytes())?; } else { anyhow::bail!("request for episode resulted in non 200 ({}) response code", response.status_code) } Ok(()) } pub(crate) fn update_podcast( alias: &str, root: &path::Path, feed_location: &str, ) -> anyhow::Result<()> { // Create output directory let output = folders::podcast_folder(root, alias); if !output.exists() { fs::create_dir_all(&output) .context(format!("failed to create output directory for podcast {}", alias))?; } println!(r#"[info] scanning feed for "{}""#, alias); if feed_location.starts_with("http") { let feed_url = feed_location; // Get the podcast feed let response = minreq::get(feed_url) // For SquareSpace which refuses requests with no User-Agent .with_header("User-Agent", "podcast-downloader") .with_header("Accept", "*/*") .send() .context(format!(r#"error when requesting feed url "{}" for {}"#, feed_url, alias))?; if response.status_code != 200 { eprintln!(r#"[error] feed "{}" for alias {} responded with non-200 ({}) status code"#, feed_url, alias, response.status_code); return Ok(()); } let feed = response.as_str()?.to_owned(); update_podcast_from_feed(&output, &feed) } else { let feed_path = root.join(feed_location); match fs::read_to_string(&feed_path) { Ok(feed) => update_podcast_from_feed(&output, &feed), Err(err) => { eprintln!(r#"[error] failed to read path "{}" with error {}"#, feed_path.display(), err); Ok(()) } } } } fn extract_extension_from_url(url: &str) -> Result, url::ParseError> { let mut url_edited = url::Url::parse(url)?; url_edited.set_query(None); match url_edited.as_str().rsplit_once('.') { Some((_, extension)) => Ok(Some(extension.to_owned())), None => Ok(None), } } fn update_artwork<'a, 'b>( channel: &rss::Channel<'a>, spec: &mut Specification<'b>, output: &path::Path, ) -> anyhow::Result<()> where 'a: 'b { let image_url = match (&channel.image, &channel.itunes_image) { (Some(image), _) => Some(&image.url), (_, Some(itunes_image)) => Some(&itunes_image.href), _ => None, }; match (&spec.image_url, image_url) { // They match, so no need to change anything (Some(old), Some(new)) if old == new => (), // New and different URL (_, Some(new)) => { match extract_extension_from_url(new.as_ref()) { Ok(Some(extension)) => { let cover_path = output.join(format!("cover-original.{}", extension)); // Remove cover with conflicting file path if it exists if cover_path.exists() { fs::remove_file(&cover_path)?; } if let Err(err) = download_to_file(new.as_ref(), &cover_path) { eprintln!(r#"[error] failed to download artwork with error "{}". skipping"#, err); } }, Ok(None) => { println!(r#"[warning] could not identify file type from url "{}" for podcast artwork "{}". skipping."#, new, channel.title); } Err(err) => { println!(r#"[warning] failed to parse url "{}" for "{}" artwork with error: {}. skipping."#, new, channel.title, err); }, }; spec.image_url = Some(new.clone()); }, _ => (), } Ok(()) } pub(crate) fn update_podcast_from_feed( output: &path::Path, feed: &str, ) -> anyhow::Result<()> { let feed = match xml_serde::from_str::(&feed) { Ok(feed) => feed, Err(err) => { eprintln!(r#"[error] failed to parse rss feed with error: "{}""#, err); return Ok(()) } }; let channel = feed.rss.channel; let spec_file = output.join("spec.toml"); let mut spec = Specification::read_from_with_default(&spec_file)?; // Get set of all currently available episodes such that we can later mark // any other episodes as unavailable let current_episodes = { let mut current_episodes = HashSet::new(); for episode in &channel.items { let guid = episode.guid.clone().unwrap(); current_episodes.insert(guid); } current_episodes }; update_artwork( &channel, &mut spec, &output, )?; for item in channel.items { let rss::Item { title, enclosure, description, summary, guid, .. } = item; let Some(enclosure) = enclosure else { println!(r#"[warning] episode "{}" does not have an enclosure tag. skipping."#, title); continue; }; let description = match (description, summary) { (Some(a), _) => Some(a), (_, Some(a)) => Some(a), _ => None, }; let guid = guid.as_deref(); let url = enclosure.url.as_ref(); let id = guid.unwrap_or(url); match spec.files.get(id) { // File already downloaded Some(path) => { // File has been deleted by another process but the specification hasn't been updated // In this case we just redownload the file // This gives an easy way to force a redownload if !output.join(path).exists() { println!(r#"[info] redownloading "{}" as the file seems to have been deleted"#, title); if let Err(err) = download_to_file(enclosure.url.as_ref(), path) { eprintln!(r#"[error] failed to redownload new episode with error: "{}". skipping"#, err); continue; } } }, None => { let extension = match extract_extension_from_url(enclosure.url.as_ref()) { Ok(Some(extension)) => extension, Ok(None) => { println!(r#"[warning] could not identify file type from url "{}" for episode "{}". skipping."#, url, title); continue; } Err(err) => { println!(r#"[warning] failed to parse url "{}" for episode "{}" with error: {}. skipping."#, url, title, err); continue; }, }; let file_path = if ["mp3", "m4a", "ogg", "wav", "mp4", "m4v", "mov", "aiff"].contains(&&extension.to_lowercase()[..]) { output.join(format!("{}.{}", sanitise(&title), extension)) } else { println!("[warning] unsupported file extension: {}. skipping.", extension); continue; }; // The filename happens to exist despite the episode not being downloaded. // In this case we need to construct a new filename by appending a digit to the end let file_path = if file_path.exists() { increment_file_name(&file_path).into_owned() } else { file_path }; println!(r#"[info] downloading "{}""#, title); match download_to_file(enclosure.url.as_ref(), &file_path) { Ok(()) => { let file_path = file_path.canonicalize().unwrap(); spec.files.insert( Cow::from(id.to_owned()), Cow::from(file_path.strip_prefix(&output).unwrap().to_owned()), ); let episode = Episode { show_notes: description, id: Cow::from(id.to_owned()), current: true, title, listened: false, }; match spec.feed.get_mut(&item.published) { Some(existing) => { existing.push(episode) }, None => { spec.feed.insert( item.published, vec![episode], ); } } // Update the file as we go, but only if a change has occured spec.write_to(&spec_file)?; }, Err(err) => { eprintln!(r#"[error] failed to request episode "{}" with error: "{}". skipping"#, title, err); continue; } } }, } } let mut feed_change = false; // Setting episodes which have been removed to no longer be current for (_, episodes) in &mut spec.feed { for episode in episodes { if !current_episodes.contains(episode.id.as_ref()) { episode.current = false; feed_change = true; } } } if feed_change { spec.write_to(&spec_file)?; } Ok(()) } /// Given a file path `something.xyz`, returns the first path of the form /// `something(a).xyz` where `a` is a non-negative integer which does not /// currently exist, or `something.xyz` if it itself does not exist. fn increment_file_name(path: &path::Path) -> Cow<'_, path::Path> { if path.exists() { let mut new_path = path.to_owned(); let mut i: u32 = 0; while new_path.exists() { let mut stem = path.file_stem().unwrap().to_owned(); let suffix = format!("({})", i); stem.push(suffix); new_path.set_file_name(stem); if let Some(extension) = path.extension() { new_path.set_extension(extension); } i += 1; } Cow::from(new_path) } // This case can easily be removed at the cost of an extra clone, however // because this path is the most likely case by far and less computationally // expensive, it is better to check first else { Cow::from(path) } }