/* Timelinize Copyright (c) 2013 Matthew Holt This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package contactlist import ( "regexp" "strings" "github.com/timelinize/timelinize/timeline" ) type format struct { name string columns map[string][]stringMatcher } // match returns a mapping of canonical field names to the matched column // indices of the header row. func (f format) match(headerRow []string) map[string][]int { results := make(map[string][]int) nextCol: for i, colName := range headerRow { for canonicalField, matchers := range f.columns { for _, matcher := range matchers { if matcher.MatchString(colName) { results[canonicalField] = append(results[canonicalField], i) continue nextCol } } } } return results } // TODO: there are a lot of columns I'm not accounting for yet that could be useful var formats = []format{ { name: "Google Contacts", columns: map[string][]stringMatcher{ "full_name": { exact{"name"}, }, "first_name": { exact{"first name"}, }, "middle_name": { exact{"middle name"}, }, "last_name": { exact{"last name"}, }, "birthdate": { exact{"birthday"}, }, "picture": { exact{"photo"}, }, timeline.AttributeGender: { exact{"gender"}, }, timeline.AttributeEmail: { regexp.MustCompile(`^E-mail \d+ - Value$`), }, timeline.AttributePhoneNumber: { regexp.MustCompile(`^Phone \d+ - Value$`), }, }, }, { name: "Generic", columns: map[string][]stringMatcher{ "full_name": { exact{"name", "full name"}, }, "first_name": { exact{"first name", "firstname", "given name", "given names", "fname"}, }, "middle_name": { exact{"middle name", "middlename"}, }, "last_name": { exact{"last name", "lastname", "surname", "family name", "second name", "lname"}, }, "birthdate": { exact{"birthday", "birthdate", "birth date", "date of birth", "dob", "bday"}, }, timeline.AttributePhoneNumber: { exact{"phone", "phone number", "phone no", "telephone", "telephone number", "telephone no"}, }, timeline.AttributeEmail: { exact{"email", "email address", "electronic mail"}, }, timeline.AttributeGender: { exact{"gender", "sex"}, }, "picture": { exact{"photo", "picture", "photograph", "profile picture", "profile photo", "avatar"}, }, }, }, } // exact matches any of the strings in the slice after normalizing // the input (which ironically uses regex). type exact []string // MatchString returns true if input is equal to any in the exact slice // after being normalized (lowercase, spaces trimmed, parenthetical // substrings removed, etc). func (e exact) MatchString(input string) bool { input = noise.ReplaceAllString(input, " ") // remove noise input = whitespace.ReplaceAllString(input, " ") // collapse whitespace normalized := strings.ToLower(strings.TrimSpace(input)) for _, val := range e { if val == normalized { return true } } return false } // matches paranthesized text "(*)", characters that aren't part of a word // and aren't a space, and underscores var noise = regexp.MustCompile(`\(.*\)|[^\w ]+|_`) // used for collapsing whitespace var whitespace = regexp.MustCompile(`\s+`) type stringMatcher interface { MatchString(input string) bool }