initial commit

2024-01-21 14:20:10 +11:00
commit e980697f57
5 changed files with 1193 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+target/
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,14 @@
+[package]
+name = "khinsider"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+anyhow = "1.0.75"
+clap = { version = "4.4.10", features = ["derive"] }
+minreq = { version = "2.11.0", features = ["https"] }
+sanitize-filename = "0.5.0"
+scraper = "0.14.0"
+url = "2.5.0"
+urlencoding = "2.1.3"
+
--- a/README.md
+++ b/README.md
@ -0,0 +1,8 @@
+# KHInsider Downloader
+
+Downloads full albums from [KHInsider](https://downloads.khinsider.com) by scraping the webpage for the appropriate download URLs.
+
+Given that this uses web scraping techniques, it is prone to breaking if KHInsider change the way the website looks. If you notice any problems please notify me.
+
+## To Do
+- [ ] Tag MP3s before writing to disk such that music players get the album name, art, and track numbering correct
--- a/src/main.rs
+++ b/src/main.rs
@ -0,0 +1,84 @@
+use std::fs;
+use std::path;
+
+use anyhow::Context;
+
+#[derive(clap::Parser)]
+struct Args {
+    /// Name of album as it appears in the URL
+    /// https://downloads.khinsider.com/game-soundtracks/album/{album_name}
+    album : String,
+    /// Output directory for downloads. Will use the album name by default.
+    output : Option<String>,
+}
+
+fn main() -> anyhow::Result<()> {
+    let args : Args = clap::Parser::parse();
+
+    let output = path::PathBuf::from(sanitize_filename::sanitize(args.output.unwrap_or(args.album.clone())));
+    if output.exists() {
+        anyhow::bail!(r#"output path "{}" already exists"#, output.display())
+    } else {
+        fs::create_dir(&output)
+            .context("failed to create output directory")?;
+    }
+
+    let album_response = minreq::get(
+        format!("https://downloads.khinsider.com/game-soundtracks/album/{}", args.album)
+    ).send().context("error when requesting album webpage")?;
+
+    if album_response.status_code != 200 {
+        anyhow::bail!("album page responded with non-200 ({}) response code", album_response.status_code)
+    }
+
+    let album_page = album_response.as_str().context("could not read album page response as a string")?;
+
+    let document = scraper::Html::parse_document(album_page);
+    let selector = scraper::Selector::parse("table#songlist > tbody > tr > td.playlistDownloadSong > a").unwrap();
+    for element in document.select(&selector) {
+        let track_url = format!(
+            "https://downloads.khinsider.com{}",
+            element.value().attr("href")
+                .context("track element did not have media url")?,
+        );
+
+        let track_response = minreq::get(track_url)
+            .send()
+            .context("error when requesting track webpage")?;
+
+        if track_response.status_code != 200 {
+            anyhow::bail!("track page responded with non-200 ({}) response code", track_response.status_code)
+        }
+        
+        let track_page = track_response.as_str().context("could not read track page response as a string")?;
+
+        let document = scraper::Html::parse_document(track_page);
+        let selector = scraper::Selector::parse("audio").unwrap();
+        for element in document.select(&selector) {
+            let audio_url = element.value().attr("src")
+                .context("audio tag did not have the expected source attribute")?;
+
+            let audio_url = url::Url::parse(audio_url)
+                .context("could not parse url for audio file")?;
+
+            let path = audio_url
+                .path_segments()
+                .map(|iter| iter.last())
+                .flatten()
+                .map(|name| urlencoding::decode(name).ok())
+                .flatten()
+                .context("failed to parse file name from audio url")?;
+
+            println!("[info] downloading track: {}", path);
+
+            let audio_response = minreq::get(audio_url.as_str())
+                .send()
+                .context("error when requesting audio file")?;
+
+            fs::write(output.join(sanitize_filename::sanitize(path.as_ref())), audio_response.as_bytes())
+                .context("error writing audio file")?;
+        }
+    }
+
+    Ok(())
+}