ED_LRR/src/preprocess.rs

221 lines
6.4 KiB
Rust

use crate::common::SystemSerde;
use fnv::FnvHashMap;
use humantime::format_duration;
use indicatif::{ProgressBar, ProgressStyle};
use serde::Deserialize;
use serde_json::Result;
use std::fs::File;
use std::io::Seek;
use std::io::{BufRead, BufReader, BufWriter, SeekFrom};
use std::path::PathBuf;
use std::str;
use std::time::Instant;
use structopt::StructOpt;
#[derive(Debug, StructOpt)]
pub struct PreprocessOpts {
#[structopt(short, long = "bodies")]
/// Path to bodies.json
pub bodies: PathBuf,
#[structopt(short, long = "systems")]
/// Path to systemsWithCoordinates.json
pub systems: PathBuf,
#[structopt(default_value = "stars")]
/// outfile prefix
pub prefix: String,
}
#[derive(Debug, Deserialize)]
#[allow(non_snake_case)]
struct Body {
name: String,
subType: String,
#[serde(rename = "type")]
body_type: String,
systemId: i32,
systemId64: i64,
#[serde(rename = "distanceToArrival")]
distance: u32,
}
#[derive(Debug, Deserialize)]
struct Coords {
x: f32,
y: f32,
z: f32,
}
#[derive(Debug, Deserialize)]
struct System {
id: i32,
id64: i64,
name: String,
coords: Coords,
date: String,
}
#[derive(Debug, StructOpt)]
#[structopt(
name = "ed_lrr_pp",
about = "Preprocessor for Elite: Dangerous Long-Range Router",
rename_all = "snake_case"
)]
/// Preprocess data for ed_lrr
struct Opt {
#[structopt(short, long = "bodies")]
/// Path to bodies.json
bodies: PathBuf,
#[structopt(short, long = "systems")]
/// Path to systemsWithCoordinates.json
systems: PathBuf,
#[structopt(default_value = "stars")]
/// outfile prefix
prefix: String,
}
fn get_mult(star_type: &str) -> f32 {
if star_type.contains("White Dwarf") {
return 1.5;
}
if star_type.contains("Neutron") {
return 4.0;
}
1.0
}
fn process(path: &PathBuf, func: &mut dyn for<'r> FnMut(&'r str) -> ()) -> std::io::Result<()> {
let mut cnt = 0;
let mut buffer = String::new();
let t_start = Instant::now();
let fh = File::open(path)?;
let prog_bar = ProgressBar::new(fh.metadata()?.len());
prog_bar.set_style(
ProgressStyle::default_bar()
.template(
"[{elapsed_precise}/{eta_precise}]{spinner} [{wide_bar}] {binary_bytes}/{binary_total_bytes} ({percent}%)",
)
.progress_chars("#9876543210 ")
.tick_chars("/-\\|"),
);
prog_bar.set_draw_delta(1024 * 1024);
let mut reader = BufReader::new(fh);
println!("Loading {} ...", path.to_str().unwrap());
while let Ok(n) = reader.read_line(&mut buffer) {
if n == 0 {
break;
}
buffer = buffer.trim_end().trim_end_matches(|c| c == ',').to_string();
if !buffer.is_empty() {
func(&buffer);
}
prog_bar.set_position(reader.seek(SeekFrom::Current(0)).unwrap());
cnt += 1;
buffer.clear();
}
prog_bar.finish_and_clear();
println!(
"Processed {} lines in {} ...",
cnt,
format_duration(t_start.elapsed())
);
Ok(())
}
fn process_systems(path: &PathBuf) -> FnvHashMap<i64, System> {
let mut ret = FnvHashMap::default();
process(path, &mut |line| {
let sys_res: Result<System> = serde_json::from_str(&line);
if let Ok(sys) = sys_res {
ret.insert(sys.id64, sys);
} else {
eprintln!("\nError parsing: {}\n\t{:?}\n", line, sys_res.unwrap_err());
}
})
.unwrap();
ret
}
fn build_index(path: &PathBuf) -> std::io::Result<()> {
let mut wtr = BufWriter::new(File::create(path.with_extension("idx"))?);
let mut idx: Vec<u64> = Vec::new();
let mut records = (csv::Reader::from_path(path)?).into_deserialize::<SystemSerde>();
loop {
idx.push(records.reader().position().byte());
if records.next().is_none() {
break;
}
}
bincode::serialize_into(&mut wtr, &idx).unwrap();
Ok(())
}
fn process_bodies(
path: &PathBuf,
out_prefix: &str,
systems: &mut FnvHashMap<i64, System>,
) -> std::io::Result<()> {
let out_path = PathBuf::from(format!("{}.csv", out_prefix));
println!(
"Processing {} into {} ...",
path.to_str().unwrap(),
out_path.to_str().unwrap(),
);
let mut n: u32 = 0;
let mut wtr = csv::Writer::from_path(out_path)?;
process(path, &mut |line| {
if !line.contains("Star") {
return;
}
let body_res: Result<Body> = serde_json::from_str(&line);
if let Ok(body) = body_res {
if !body.body_type.contains("Star") {
return;
}
if let Some(sys) = systems.get(&body.systemId64) {
let sub_type = body.subType;
let mult = get_mult(&sub_type);
let sys_name = sys.name.clone();
let mut body_name = body.name.replace(&sys_name, "").trim().to_string();
if body_name == sys_name {
body_name = "".to_string();
}
let rec = SystemSerde {
id: n,
star_type: sub_type,
system: sys_name,
body: body_name,
mult,
distance: body.distance,
x: sys.coords.x,
y: sys.coords.y,
z: sys.coords.z,
};
wtr.serialize(rec).unwrap();
n += 1;
};
} else {
eprintln!("\nError parsing: {}\n\t{:?}\n", line, body_res.unwrap_err());
}
})
.unwrap();
println!("Total Systems: {}", n);
systems.clear();
Ok(())
}
pub fn preprocess_files(opts: PreprocessOpts) -> std::io::Result<()> {
let out_path = PathBuf::from(format!("{}.csv", &opts.prefix));
if !out_path.exists() {
let mut systems = process_systems(&opts.systems);
process_bodies(&opts.bodies, &opts.prefix, &mut systems)?;
} else {
println!(
"File '{}' exists, not overwriting it",
out_path.to_str().unwrap()
);
}
println!("Building index...");
println!("Index result: {:?}", build_index(&out_path));
Ok(())
}