70 lines
2.5 KiB
Rust
70 lines
2.5 KiB
Rust
use crate::common::{EdLrrError, EdLrrResult, System};
|
|
use crate::info;
|
|
use csv_core::{ReadFieldResult, Reader};
|
|
use memmap::Mmap;
|
|
use std::collections::HashMap;
|
|
use std::fs::File;
|
|
use std::path::Path;
|
|
|
|
pub fn mmap_csv(path: &Path, query: Vec<String>) -> Result<HashMap<String, Option<u32>>, String> {
|
|
let file = File::open(path).map_err(|e| e.to_string())?;
|
|
let mm = unsafe { Mmap::map(&file) }.map_err(|e| e.to_string())?;
|
|
let mut best = query
|
|
.iter()
|
|
.map(|s| (s, (s.as_bytes(), usize::MAX, u32::MAX)))
|
|
.collect::<Vec<(&String, (_, usize, u32))>>();
|
|
let t_start = std::time::Instant::now();
|
|
let dist = eddie::slice::DamerauLevenshtein::new();
|
|
let mut row = 0;
|
|
{
|
|
let mut data = &mm[..];
|
|
let mut rdr = Reader::new();
|
|
let mut field = [0; 1024];
|
|
let mut fieldidx = 0;
|
|
loop {
|
|
let (result, nread, nwrite) = rdr.read_field(data, &mut field);
|
|
data = &data[nread..];
|
|
let field = &field[..nwrite];
|
|
match result {
|
|
ReadFieldResult::InputEmpty => {}
|
|
ReadFieldResult::OutputFull => {
|
|
return Err("Encountered field larget than 1024 bytes!".to_string());
|
|
}
|
|
ReadFieldResult::Field { record_end } => {
|
|
if fieldidx == 1 {
|
|
for (_, (name_b, best_dist, id)) in best.iter_mut() {
|
|
let d = dist.distance(name_b, field);
|
|
if d < *best_dist {
|
|
*best_dist = d;
|
|
*id = row;
|
|
}
|
|
}
|
|
}
|
|
if record_end {
|
|
fieldidx = 0;
|
|
row += 1;
|
|
} else {
|
|
fieldidx += 1;
|
|
}
|
|
}
|
|
// This case happens when the CSV reader has successfully exhausted
|
|
// all input.
|
|
ReadFieldResult::End => {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
let search_result = best
|
|
.drain(..)
|
|
.map(|(query_name, (_, _, idx))| (query_name.clone(), Some(idx)))
|
|
.collect::<HashMap<String, Option<u32>>>();
|
|
let rate = (row as f64) / t_start.elapsed().as_secs_f64();
|
|
info!(
|
|
"Took: {:.2?}, {:.2} systems/second",
|
|
t_start.elapsed(),
|
|
rate
|
|
);
|
|
Ok(search_result)
|
|
}
|