use std::fs; use std::path; use anyhow::Context; #[derive(clap::Parser)] struct Args { /// Name of album as it appears in the URL /// https://downloads.khinsider.com/game-soundtracks/album/{album_name} album : String, /// Output directory for downloads. Will use the album name by default. output : Option, } fn main() -> anyhow::Result<()> { let args : Args = clap::Parser::parse(); let output = path::PathBuf::from(sanitize_filename::sanitize(args.output.unwrap_or(args.album.clone()))); if output.exists() { anyhow::bail!(r#"output path "{}" already exists"#, output.display()) } else { fs::create_dir(&output) .context("failed to create output directory")?; } let album_response = minreq::get( format!("https://downloads.khinsider.com/game-soundtracks/album/{}", args.album) ).send().context("error when requesting album webpage")?; if album_response.status_code != 200 { anyhow::bail!("album page responded with non-200 ({}) response code", album_response.status_code) } let album_page = album_response.as_str().context("could not read album page response as a string")?; let document = scraper::Html::parse_document(album_page); let tracks_selector = scraper::Selector::parse("table#songlist > tbody > tr:not(#songlist_header):not(#songlist_footer)").unwrap(); let headings_selector = scraper::Selector::parse("table#songlist > tbody > tr#songlist_header > th").unwrap(); let headings = document.select(&headings_selector).map(|a| a.inner_html()).collect::>(); let heading_selector = scraper::Selector::parse("div#pageContent > h2").unwrap(); let album_name = document.select(&heading_selector).next().context("could not find album name heading")?.inner_html(); for element in document.select(&tracks_selector) { let mut tag = id3::Tag::new(); use id3::TagLike; tag.set_album(album_name.clone()); let download_link_selector = scraper::Selector::parse("td.playlistDownloadSong > a").unwrap(); let download_link = element.select(&download_link_selector).next().context("could not find download link")?; let columns_selector = scraper::Selector::parse("td").unwrap(); let columns = element.select(&columns_selector).collect::>(); if let Some(track) = headings.iter().position(|x| x == "#") .and_then(|idx| { columns[idx] .inner_html() .trim_end_matches(".") .parse::().ok() }) { tag.set_track(track); } if let Some(disc) = headings.iter().position(|x| x == "CD").and_then(|idx| { columns[idx] .inner_html() .parse::().ok() }) { tag.set_disc(disc); } let track_url = format!( "https://downloads.khinsider.com{}", download_link.value().attr("href") .context("track element did not have media url")?, ); let track_response = minreq::get(track_url) .send() .context("error when requesting track webpage")?; if track_response.status_code != 200 { anyhow::bail!("track page responded with non-200 ({}) response code", track_response.status_code) } let track_page = track_response.as_str().context("could not read track page response as a string")?; let document = scraper::Html::parse_document(track_page); let audio_selector = scraper::Selector::parse("audio").unwrap(); let audio = document.select(&audio_selector).next().context("could not find audio tag")?; let meta_selector = scraper::Selector::parse("p[align='left'] > b").unwrap(); let meta = document.select(&meta_selector); let meta = meta.collect::>(); let song_name = meta[2].inner_html(); tag.set_title(song_name); let audio_url = audio.value().attr("src") .context("audio tag did not have the expected source attribute")?; let audio_url = url::Url::parse(audio_url) .context("could not parse url for audio file")?; let path = audio_url .path_segments() .map(|iter| iter.last()) .flatten() .map(|name| urlencoding::decode(name).ok()) .flatten() .context("failed to parse file name from audio url")?; println!("[info] downloading track: {}", path); let audio_response = minreq::get(audio_url.as_str()) .send() .context("error when requesting audio file")?; let mut audio_file = audio_response.as_bytes().to_vec(); tag.write_to(&mut audio_file, id3::Version::Id3v24)?; fs::write(output.join(sanitize_filename::sanitize(path.as_ref())), audio_response.as_bytes()) .context("error writing audio file")?; } Ok(()) }