use crate::common::get_mult; use crate::common::SystemSerde; use fnv::FnvHashMap; use pyo3::prelude::*; use serde::Deserialize; use serde_json::Result; use std::fs::File; use std::io::Seek; use std::io::{BufRead, BufReader, BufWriter, SeekFrom}; use std::path::PathBuf; use std::str; use std::time::Instant; #[derive(Debug, Deserialize)] #[allow(non_snake_case)] struct Body { name: String, subType: String, #[serde(rename = "type")] body_type: String, systemId: i32, systemId64: i64, #[serde(rename = "distanceToArrival")] distance: f32, } #[derive(Debug, Deserialize)] struct Coords { x: f32, y: f32, z: f32, } #[derive(Debug, Deserialize)] struct System { id: i32, id64: i64, name: String, coords: Coords, } #[derive(Debug)] pub struct PreprocessState { pub file: String, pub message: String, pub total: u64, pub done: u64, pub count: usize, } fn process( path: &PathBuf, func: &mut dyn for<'r> FnMut(&'r str) -> (), callback: &dyn Fn(&PreprocessState) -> PyResult, ) -> std::io::Result<()> { let mut buffer = String::new(); let fh = File::open(path)?; let total_size = fh.metadata()?.len(); let mut t_last = Instant::now(); let mut reader = BufReader::new(fh); let mut state = PreprocessState { file: path.to_str().unwrap().to_owned(), total: total_size, done: 0, count: 0, message: format!("Processing {} ...", path.to_str().unwrap()), }; println!("Loading {} ...", path.to_str().unwrap()); while let Ok(n) = reader.read_line(&mut buffer) { if n == 0 { break; } buffer = buffer.trim_end().trim_end_matches(|c| c == ',').to_string(); if !buffer.is_empty() { func(&buffer); } let pos = reader.seek(SeekFrom::Current(0)).unwrap(); state.done = pos; state.count += 1; if t_last.elapsed().as_millis() > 100 { callback(&state)?; t_last = Instant::now(); } buffer.clear(); } Ok(()) } fn process_systems( path: &PathBuf, callback: &dyn Fn(&PreprocessState) -> PyResult, ) -> FnvHashMap { let mut ret = FnvHashMap::default(); process( path, &mut |line| { let sys_res: Result = serde_json::from_str(&line); if let Ok(sys) = sys_res { ret.insert(sys.id, sys); } else { eprintln!("\nError parsing: {}\n\t{:?}\n", line, sys_res.unwrap_err()); } }, callback, ) .unwrap(); ret } pub fn build_index(path: &PathBuf) -> std::io::Result<()> { let mut wtr = BufWriter::new(File::create(path.with_extension("idx"))?); let mut idx: Vec = Vec::new(); let mut records = (csv::Reader::from_path(path)?).into_deserialize::(); loop { idx.push(records.reader().position().byte()); if records.next().is_none() { break; } } bincode::serialize_into(&mut wtr, &idx).unwrap(); Ok(()) } fn process_bodies( path: &PathBuf, out_path: &PathBuf, systems: &mut FnvHashMap, callback: &dyn Fn(&PreprocessState) -> PyResult, ) -> std::io::Result<()> { println!( "Processing {} into {} ...", path.to_str().unwrap(), out_path.to_str().unwrap(), ); let mut n: u32 = 0; let mut wtr = csv::Writer::from_path(out_path)?; process( path, &mut |line| { if !line.contains("Star") { return; } let body_res: Result = serde_json::from_str(&line); if let Ok(body) = body_res { if !body.body_type.contains("Star") { return; } if let Some(sys) = systems.get(&body.systemId) { let sub_type = body.subType; let mult = get_mult(&sub_type); let sys_name = sys.name.clone(); let rec = SystemSerde { id: n, star_type: sub_type, system: sys_name, body: body.name, mult, distance: body.distance, x: sys.coords.x, y: sys.coords.y, z: sys.coords.z, }; wtr.serialize(rec).unwrap(); n += 1; }; } else { eprintln!("\nError parsing: {}\n\t{:?}\n", line, body_res.unwrap_err()); } }, callback, ) .unwrap(); println!("Total Systems: {}", n); systems.clear(); Ok(()) } pub fn preprocess_files( bodies: &PathBuf, systems: &PathBuf, out_path: &PathBuf, callback: &dyn Fn(&PreprocessState) -> PyResult, ) -> std::io::Result<()> { if !out_path.exists() { let mut systems = process_systems(systems, &callback); process_bodies(bodies, out_path, &mut systems, &callback)?; } else { println!( "File '{}' exists, not overwriting it", out_path.to_str().unwrap() ); } println!("Building index..."); println!("Index result: {:?}", build_index(&out_path)); Ok(()) }