initial code for tag stripping and m3u generation

This commit is contained in:
Aaron Manning
2025-08-29 21:33:20 +10:00
parent 105a3eb892
commit cb47ff0cb8
6 changed files with 337 additions and 49 deletions

198
Cargo.lock generated
View File

@@ -1,6 +1,12 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aho-corasick"
@@ -91,18 +97,50 @@ dependencies = [
"winapi",
]
[[package]]
name = "audiotags"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44e797ce0164cf599c71f2c3849b56301d96a3dc033544588e875686b050ed39"
dependencies = [
"audiotags-macro",
"id3",
"metaflac",
"mp4ameta",
"readme-rustdocifier",
"thiserror",
]
[[package]]
name = "audiotags-macro"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaa9b2312fc01f7291f3b7b0f52ed08b1c0177c96a2e696ab55695cc4d06889"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "2.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d"
[[package]]
name = "bumpalo"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.0.83"
@@ -185,6 +223,15 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "either"
version = "1.9.0"
@@ -210,13 +257,23 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "flate2"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
"percent-encoding 2.3.1",
]
[[package]]
@@ -289,6 +346,28 @@ dependencies = [
"cc",
]
[[package]]
name = "id3"
version = "1.16.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aadb14a5ba1a0d58ecd4a29bfc9b8f1d119eee24aa01a62c1ec93eb9630a1d86"
dependencies = [
"bitflags",
"byteorder",
"flate2",
]
[[package]]
name = "idna"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f09e0f0b1fb55fdee1f17470ad800da77af5186a1a76c026b679358b7e844e"
dependencies = [
"matches",
"unicode-bidi",
"unicode-normalization",
]
[[package]]
name = "idna"
version = "0.5.0"
@@ -327,6 +406,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.152"
@@ -339,12 +424,46 @@ version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "m3u"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca48bebf6a7397a81aa8b9dcc8a7cfcbacab3e15a28f49d2aabaa9e3c06def4d"
dependencies = [
"url 1.7.2",
]
[[package]]
name = "matches"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "metaflac"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdf25a3451319c52a4a56d956475fbbb763bfb8420e2187d802485cb0fd8d965"
dependencies = [
"byteorder",
"hex",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
]
[[package]]
name = "minreq"
version = "2.11.0"
@@ -358,6 +477,22 @@ dependencies = [
"webpki-roots",
]
[[package]]
name = "mp4ameta"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb23d62e8eb5299a3f79657c70ea9269eac8f6239a76952689bcd06a74057e81"
dependencies = [
"lazy_static",
"mp4ameta_proc",
]
[[package]]
name = "mp4ameta_proc"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07dcca13d1740c0a665f77104803360da0bdb3323ecce2e93fa2c959a6d52806"
[[package]]
name = "num-traits"
version = "0.2.17"
@@ -373,6 +508,12 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "percent-encoding"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831"
[[package]]
name = "percent-encoding"
version = "2.3.1"
@@ -384,13 +525,15 @@ name = "podcast-hoarder"
version = "0.0.0"
dependencies = [
"anyhow",
"audiotags",
"chrono",
"clap",
"m3u",
"minreq",
"sanitise-file-name",
"serde",
"toml",
"url",
"url 2.5.0",
"xml_serde",
]
@@ -406,9 +549,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.76"
version = "1.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
dependencies = [
"unicode-ident",
]
@@ -428,6 +571,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "readme-rustdocifier"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08ad765b21a08b1a8e5cdce052719188a23772bcbefb3c439f0baaf62c56ceac"
[[package]]
name = "regex"
version = "1.10.2"
@@ -552,9 +701,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "2.0.48"
version = "2.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f"
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
dependencies = [
"proc-macro2",
"quote",
@@ -570,6 +719,26 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@@ -646,6 +815,17 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "url"
version = "1.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd4e7c0d531266369519a4aa4f399d748bd37043b00bde1e4ff1f60a120b355a"
dependencies = [
"idna 0.1.5",
"matches",
"percent-encoding 1.0.1",
]
[[package]]
name = "url"
version = "2.5.0"
@@ -653,8 +833,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
"idna 0.5.0",
"percent-encoding 2.3.1",
]
[[package]]

View File

@@ -7,8 +7,10 @@ license = "GPL-2.0-only"
[dependencies]
anyhow = "1.0.76"
audiotags = "0.5.0"
chrono = { version = "0.4.31", features = ["serde"] }
clap = { version = "4.4.11", features = ["derive"] }
m3u = "1.0.0"
minreq = { version = "2.11.0", features = ["https"] }
sanitise-file-name = "1.0.0"
serde = { version = "1.0.193", features = ["derive"] }

View File

@@ -9,15 +9,18 @@ use sanitise_file_name::sanitise;
use crate::rss;
#[derive(Default, serde::Serialize, serde::Deserialize)]
pub (crate) struct Specification<'a> {
files : HashMap<Cow<'a, str>, Cow<'a, path::Path>>,
feed : BTreeMap<chrono::NaiveDateTime, Vec<Episode<'a>>>,
image_url : Option<Cow<'a, str>>,
#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
pub(crate) struct Specification<'a> {
files: HashMap<Cow<'a, str>, Cow<'a, path::Path>>,
/// This is a collection of episodes, where each entry contains a `Vec` of
/// episodes to allow for the possibility that multiple episodes have the
/// same timestamp.
feed: BTreeMap<chrono::NaiveDateTime, Vec<Episode<'a>>>,
image_url: Option<Cow<'a, str>>,
}
impl<'a> Specification<'a> {
pub (crate) fn read_from_with_default(path : &path::Path) -> Result<Self, anyhow::Error> {
pub(crate) fn read_from_with_default(path: &path::Path) -> Result<Self, anyhow::Error> {
Ok(if path.is_file() {
toml::from_str(&fs::read_to_string(&path)?[..])?
} else {
@@ -25,7 +28,7 @@ impl<'a> Specification<'a> {
})
}
pub (crate) fn read_from(path : &path::Path) -> Result<Self, anyhow::Error> {
pub(crate) fn read_from(path: &path::Path) -> Result<Self, anyhow::Error> {
Ok(if path.is_file() {
toml::from_str(&fs::read_to_string(&path)?[..])?
} else {
@@ -33,41 +36,49 @@ impl<'a> Specification<'a> {
})
}
pub (crate) fn write_to(&self, path : &path::Path) -> Result<(), anyhow::Error> {
pub(crate) fn write_to(&self, path: &path::Path) -> Result<(), anyhow::Error> {
Ok(fs::write(path, toml::to_string(self)?.as_bytes())?)
}
pub (crate) fn feed_iter(&self) -> impl Iterator<Item = (&chrono::NaiveDateTime, &Vec<Episode<'a>>)> {
pub(crate) fn feed_iter(&self) -> impl Iterator<Item = (&chrono::NaiveDateTime, &Vec<Episode<'a>>)> {
self.feed.iter()
}
pub (crate) fn feed_iter_mut(&mut self) -> impl Iterator<Item = (&chrono::NaiveDateTime, &mut Vec<Episode<'a>>)> {
pub(crate) fn feed_iter_mut(&mut self) -> impl Iterator<Item = (&chrono::NaiveDateTime, &mut Vec<Episode<'a>>)> {
self.feed.iter_mut()
}
pub(crate) fn path_from_id(&self, id: &str) -> Option<&path::Path> {
self.files.get(id).map(|v| &**v)
}
}
#[derive(serde::Serialize, serde::Deserialize)]
#[derive(Debug, serde::Serialize, serde::Deserialize)]
pub (crate) struct Episode<'a> {
/// Episode title.
title : Cow<'a, str>,
title: Cow<'a, str>,
/// Show notes pulled from description or summary tag.
show_notes : Option<Cow<'a, str>>,
show_notes: Option<Cow<'a, str>>,
/// This is the GUID or the URL if the GUID is not present.
id : Cow<'a, str>,
id: Cow<'a, str>,
/// If the episode exists in the latest version of the feed.
current : bool,
current: bool,
/// Flag to keep track of which episodes have been listened to.
#[serde(default)]
pub (crate) listened : bool,
pub(crate) listened: bool,
}
impl<'a> Episode<'a> {
pub (crate) fn title(&self) -> &str {
self.title.as_ref()
}
pub(crate) fn id(&self) -> &str {
&self.id
}
}
fn download_to_file(url : &str, path : &path::Path) -> anyhow::Result<()> {
fn download_to_file(url: &str, path: &path::Path) -> anyhow::Result<()> {
let response = minreq::get(url)
.send()?;
@@ -80,10 +91,10 @@ fn download_to_file(url : &str, path : &path::Path) -> anyhow::Result<()> {
Ok(())
}
pub (crate) fn update_podcast(
alias : &str,
root : &path::Path,
feed_location : &str,
pub(crate) fn update_podcast(
alias: &str,
root: &path::Path,
feed_location: &str,
) -> anyhow::Result<()> {
// Create output directory
@@ -126,7 +137,7 @@ pub (crate) fn update_podcast(
}
}
fn extract_extension_from_url(url : &str) -> Result<Option<String>, url::ParseError> {
fn extract_extension_from_url(url: &str) -> Result<Option<String>, url::ParseError> {
let mut url_edited = url::Url::parse(url)?;
url_edited.set_query(None);
@@ -137,10 +148,10 @@ fn extract_extension_from_url(url : &str) -> Result<Option<String>, url::ParseEr
}
fn update_artwork<'a, 'b>(
channel : &rss::Channel<'a>,
spec : &mut Specification<'b>,
output : &path::Path,
) -> anyhow::Result<()> where 'a : 'b {
channel: &rss::Channel<'a>,
spec: &mut Specification<'b>,
output: &path::Path,
) -> anyhow::Result<()> where 'a: 'b {
let image_url = match (&channel.image, &channel.itunes_image) {
(Some(image), _) => Some(&image.url),
@@ -156,7 +167,7 @@ fn update_artwork<'a, 'b>(
match extract_extension_from_url(new.as_ref()) {
Ok(Some(extension)) => {
let cover_path = output.join(format!("cover.{}", extension));
let cover_path = output.join(format!("cover-original.{}", extension));
// Remove cover with conflicting file path if it exists
if cover_path.exists() {
@@ -184,9 +195,9 @@ fn update_artwork<'a, 'b>(
}
pub (crate) fn update_podcast_from_feed(
output : &path::Path,
feed : &str,
pub(crate) fn update_podcast_from_feed(
output: &path::Path,
feed: &str,
) -> anyhow::Result<()> {
let feed = match xml_serde::from_str::<rss::Feed>(&feed) {
@@ -300,11 +311,11 @@ pub (crate) fn update_podcast_from_feed(
);
let episode = Episode {
show_notes : description,
id : Cow::from(id.to_owned()),
current : true,
show_notes: description,
id: Cow::from(id.to_owned()),
current: true,
title,
listened : false,
listened: false,
};
match spec.feed.get_mut(&item.published) {
@@ -352,12 +363,12 @@ pub (crate) fn update_podcast_from_feed(
/// Given a file path `something.xyz`, returns the first path of the form
/// `something(a).xyz` where `a` is a non-negative integer which does not
/// currently exist, or `something.xyz` if it itself does not exist.
fn increment_file_name(path : &path::Path) -> Cow<'_, path::Path> {
fn increment_file_name(path: &path::Path) -> Cow<'_, path::Path> {
if path.exists() {
let mut new_path = path.to_owned();
let mut i : u32 = 0;
let mut i: u32 = 0;
while new_path.exists() {
let mut stem = path.file_stem().unwrap().to_owned();

View File

@@ -42,6 +42,12 @@ pub (crate) enum Command {
#[arg(long, short)]
podcast : String,
},
/// Tags files and generates playlists ready for use with an iPod.
Tag {
/// The podcast to tag and generate playlists for.
#[arg(long, short)]
podcast : Option<String>,
},
}
/// Struct modelling configuration file format.

View File

@@ -1,5 +1,6 @@
mod rss;
mod input;
mod tagging;
mod download;
use input::{Command, ListenStatus};
@@ -28,7 +29,6 @@ fn main() -> anyhow::Result<()> {
anyhow::bail!("could not get parent of configuration path for root directory")
};
match args.command {
Command::Download { podcast } => {
// Updating single podcast
@@ -77,10 +77,18 @@ fn main() -> anyhow::Result<()> {
spec.write_to(&spec_file)?;
},
Command::Tag { podcast } => {
if let Some(alias) = podcast {
tagging::generate_m3u(alias.as_str(), root)?;
tagging::strip_tags(alias.as_str(), root)?;
} else {
for (alias, _) in config.podcasts {
tagging::generate_m3u(alias.as_str(), root)?;
tagging::strip_tags(alias.as_str(), root)?;
}
}
}
};
Ok(())
}

81
src/tagging.rs Normal file
View File

@@ -0,0 +1,81 @@
use std::{fs, path};
use crate::download;
use anyhow::Context;
use sanitise_file_name::sanitise;
pub(crate) fn generate_m3u(
alias: &str,
root: &path::Path,
) -> anyhow::Result<()> {
let output = root.join(sanitise(&alias));
let spec_file = output.join("spec.toml");
let spec = download::Specification::read_from(&spec_file)?;
let mut playlist = Vec::new();
for episode in spec.feed_iter().map(|(_, eps)| eps.iter()).flatten() {
let path = output.join(
spec.path_from_id(episode.id()).unwrap()
);
playlist.push(m3u::path_entry({
let relative = path.strip_prefix(
output.parent().unwrap()
).unwrap();
path::Path::new("/Podcasts").join(relative)
}));
}
// Write the playlist file
{
let playlists_folder = root.join("Playlists");
if !playlists_folder.exists() {
fs::create_dir(&playlists_folder)
.context(format!("failed to create output directory for playlists"))?;
}
let mut path = playlists_folder.join(sanitise(&alias));
path.set_extension("m3u");
let mut file = fs::File::create(path)?;
let mut writer = m3u::Writer::new(&mut file);
for entry in &playlist {
writer.write_entry(entry)?;
}
}
Ok(())
}
pub(crate) fn strip_tags(
alias: &str,
root: &path::Path,
) -> anyhow::Result<()> {
let output = root.join(sanitise(&alias));
let spec_file = output.join("spec.toml");
let spec = download::Specification::read_from(&spec_file)?;
for episode in spec.feed_iter().map(|(_, eps)| eps.iter()).flatten() {
let path = output.join(
spec.path_from_id(episode.id()).unwrap()
);
let mut file = audiotags::Tag::new().read_from_path(
&path
)?;
file.remove_title();
file.remove_artist();
file.remove_year();
file.remove_album();
file.set_genre("Podcast");
file.set_title(episode.title());
file.write_to_path(path.as_path().to_str().unwrap())?;
}
Ok(())
}