use std::fs; use std::path; use anyhow::Context; #[derive(clap::Parser)] struct Args { /// Name of album as it appears in the URL /// https://downloads.khinsider.com/game-soundtracks/album/{album_name} album : String, /// Output directory for downloads. Will use the album name by default. output : Option, } fn main() -> anyhow::Result<()> { let args : Args = clap::Parser::parse(); let output = path::PathBuf::from(sanitize_filename::sanitize(args.output.unwrap_or(args.album.clone()))); if output.exists() { anyhow::bail!(r#"output path "{}" already exists"#, output.display()) } else { fs::create_dir(&output) .context("failed to create output directory")?; } let album_response = minreq::get( format!("https://downloads.khinsider.com/game-soundtracks/album/{}", args.album) ).send().context("error when requesting album webpage")?; if album_response.status_code != 200 { anyhow::bail!("album page responded with non-200 ({}) response code", album_response.status_code) } let album_page = album_response.as_str().context("could not read album page response as a string")?; let document = scraper::Html::parse_document(album_page); let tracks_selector = scraper::Selector::parse("table#songlist > tbody > tr:not(#songlist_header):not(#songlist_footer)").unwrap(); let headings_selector = scraper::Selector::parse("table#songlist > tbody > tr#songlist_header > th").unwrap(); let headings = document.select(&headings_selector).map(|a| a.inner_html()).collect::>(); let heading_selector = scraper::Selector::parse("div#pageContent > h2").unwrap(); let album_name = document.select(&heading_selector).next().unwrap().inner_html(); for element in document.select(&tracks_selector) { let mut tag = id3::Tag::new(); use id3::TagLike; tag.set_album(album_name.clone()); let download_link_selector = scraper::Selector::parse("td.playlistDownloadSong > a").unwrap(); let download_link = element.select(&download_link_selector).next().unwrap(); let columns_selector = scraper::Selector::parse("td").unwrap(); let columns = element.select(&columns_selector).collect::>(); let track = columns[headings.iter().position(|x| x == "#").unwrap()].inner_html().trim_end_matches(".").parse::().unwrap(); tag.set_track(track); if let Some(cd) = headings.iter().position(|x| x == "CD") { tag.set_disc(columns[cd].inner_html().parse::().unwrap_or(1)); } let track_url = format!( "https://downloads.khinsider.com{}", download_link.value().attr("href") .context("track element did not have media url")?, ); let track_response = minreq::get(track_url) .send() .context("error when requesting track webpage")?; if track_response.status_code != 200 { anyhow::bail!("track page responded with non-200 ({}) response code", track_response.status_code) } let track_page = track_response.as_str().context("could not read track page response as a string")?; let document = scraper::Html::parse_document(track_page); let audio_selector = scraper::Selector::parse("audio").unwrap(); let audio = document.select(&audio_selector).next().unwrap(); let meta_selector = scraper::Selector::parse("p[align='left'] > b").unwrap(); let meta = document.select(&meta_selector); let meta = meta.collect::>(); let song_name = meta[2].inner_html(); tag.set_title(song_name); let audio_url = audio.value().attr("src") .context("audio tag did not have the expected source attribute")?; let audio_url = url::Url::parse(audio_url) .context("could not parse url for audio file")?; let path = audio_url .path_segments() .map(|iter| iter.last()) .flatten() .map(|name| urlencoding::decode(name).ok()) .flatten() .context("failed to parse file name from audio url")?; println!("[info] downloading track: {}", path); let audio_response = minreq::get(audio_url.as_str()) .send() .context("error when requesting audio file")?; let mut audio_file = audio_response.as_bytes().to_vec(); tag.write_to(&mut audio_file, id3::Version::Id3v24).unwrap(); fs::write(output.join(sanitize_filename::sanitize(path.as_ref())), audio_response.as_bytes()) .context("error writing audio file")?; } Ok(()) }