initial commit
This commit is contained in:
commit
e980697f57
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
target/
|
1086
Cargo.lock
generated
Normal file
1086
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
14
Cargo.toml
Normal file
14
Cargo.toml
Normal file
@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "khinsider"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
clap = { version = "4.4.10", features = ["derive"] }
|
||||
minreq = { version = "2.11.0", features = ["https"] }
|
||||
sanitize-filename = "0.5.0"
|
||||
scraper = "0.14.0"
|
||||
url = "2.5.0"
|
||||
urlencoding = "2.1.3"
|
||||
|
8
README.md
Normal file
8
README.md
Normal file
@ -0,0 +1,8 @@
|
||||
# KHInsider Downloader
|
||||
|
||||
Downloads full albums from [KHInsider](https://downloads.khinsider.com) by scraping the webpage for the appropriate download URLs.
|
||||
|
||||
Given that this uses web scraping techniques, it is prone to breaking if KHInsider change the way the website looks. If you notice any problems please notify me.
|
||||
|
||||
## To Do
|
||||
- [ ] Tag MP3s before writing to disk such that music players get the album name, art, and track numbering correct
|
84
src/main.rs
Normal file
84
src/main.rs
Normal file
@ -0,0 +1,84 @@
|
||||
use std::fs;
|
||||
use std::path;
|
||||
|
||||
use anyhow::Context;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
/// Name of album as it appears in the URL
|
||||
/// https://downloads.khinsider.com/game-soundtracks/album/{album_name}
|
||||
album : String,
|
||||
/// Output directory for downloads. Will use the album name by default.
|
||||
output : Option<String>,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let args : Args = clap::Parser::parse();
|
||||
|
||||
let output = path::PathBuf::from(sanitize_filename::sanitize(args.output.unwrap_or(args.album.clone())));
|
||||
if output.exists() {
|
||||
anyhow::bail!(r#"output path "{}" already exists"#, output.display())
|
||||
} else {
|
||||
fs::create_dir(&output)
|
||||
.context("failed to create output directory")?;
|
||||
}
|
||||
|
||||
let album_response = minreq::get(
|
||||
format!("https://downloads.khinsider.com/game-soundtracks/album/{}", args.album)
|
||||
).send().context("error when requesting album webpage")?;
|
||||
|
||||
if album_response.status_code != 200 {
|
||||
anyhow::bail!("album page responded with non-200 ({}) response code", album_response.status_code)
|
||||
}
|
||||
|
||||
let album_page = album_response.as_str().context("could not read album page response as a string")?;
|
||||
|
||||
let document = scraper::Html::parse_document(album_page);
|
||||
let selector = scraper::Selector::parse("table#songlist > tbody > tr > td.playlistDownloadSong > a").unwrap();
|
||||
for element in document.select(&selector) {
|
||||
let track_url = format!(
|
||||
"https://downloads.khinsider.com{}",
|
||||
element.value().attr("href")
|
||||
.context("track element did not have media url")?,
|
||||
);
|
||||
|
||||
let track_response = minreq::get(track_url)
|
||||
.send()
|
||||
.context("error when requesting track webpage")?;
|
||||
|
||||
if track_response.status_code != 200 {
|
||||
anyhow::bail!("track page responded with non-200 ({}) response code", track_response.status_code)
|
||||
}
|
||||
|
||||
let track_page = track_response.as_str().context("could not read track page response as a string")?;
|
||||
|
||||
let document = scraper::Html::parse_document(track_page);
|
||||
let selector = scraper::Selector::parse("audio").unwrap();
|
||||
for element in document.select(&selector) {
|
||||
let audio_url = element.value().attr("src")
|
||||
.context("audio tag did not have the expected source attribute")?;
|
||||
|
||||
let audio_url = url::Url::parse(audio_url)
|
||||
.context("could not parse url for audio file")?;
|
||||
|
||||
let path = audio_url
|
||||
.path_segments()
|
||||
.map(|iter| iter.last())
|
||||
.flatten()
|
||||
.map(|name| urlencoding::decode(name).ok())
|
||||
.flatten()
|
||||
.context("failed to parse file name from audio url")?;
|
||||
|
||||
println!("[info] downloading track: {}", path);
|
||||
|
||||
let audio_response = minreq::get(audio_url.as_str())
|
||||
.send()
|
||||
.context("error when requesting audio file")?;
|
||||
|
||||
fs::write(output.join(sanitize_filename::sanitize(path.as_ref())), audio_response.as_bytes())
|
||||
.context("error writing audio file")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue
Block a user