use crate::common::{EdLrrError, EdLrrResult, System}; use crate::info; use csv_core::{ReadFieldResult, Reader}; use memmap::Mmap; use std::collections::HashMap; use std::fs::File; use std::path::Path; pub fn mmap_csv(path: &Path, query: Vec) -> Result>, String> { let file = File::open(path).map_err(|e| e.to_string())?; let mm = unsafe { Mmap::map(&file) }.map_err(|e| e.to_string())?; let mut best = query .iter() .map(|s| (s, (s.as_bytes(), usize::MAX, u32::MAX))) .collect::>(); let t_start = std::time::Instant::now(); let dist = eddie::slice::DamerauLevenshtein::new(); let mut row = 0; { let mut data = &mm[..]; let mut rdr = Reader::new(); let mut field = [0; 1024]; let mut fieldidx = 0; loop { let (result, nread, nwrite) = rdr.read_field(data, &mut field); data = &data[nread..]; let field = &field[..nwrite]; match result { ReadFieldResult::InputEmpty => {} ReadFieldResult::OutputFull => { return Err("Encountered field larget than 1024 bytes!".to_string()); } ReadFieldResult::Field { record_end } => { if fieldidx == 1 { for (_, (name_b, best_dist, id)) in best.iter_mut() { let d = dist.distance(name_b, field); if d < *best_dist { *best_dist = d; *id = row; } } } if record_end { fieldidx = 0; row += 1; } else { fieldidx += 1; } } // This case happens when the CSV reader has successfully exhausted // all input. ReadFieldResult::End => { break; } } } } let search_result = best .drain(..) .map(|(query_name, (_, _, idx))| (query_name.clone(), Some(idx))) .collect::>>(); let rate = (row as f64) / t_start.elapsed().as_secs_f64(); info!( "Took: {:.2?}, {:.2} systems/second", t_start.elapsed(), rate ); Ok(search_result) }