1
0
Fork 0
timelinize/datasources/contactlist/formats.go
2026-01-28 21:53:14 -07:00

149 lines
3.8 KiB
Go

/*
Timelinize
Copyright (c) 2013 Matthew Holt
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package contactlist
import (
"regexp"
"strings"
"github.com/timelinize/timelinize/timeline"
)
type format struct {
name string
columns map[string][]stringMatcher
}
// match returns a mapping of canonical field names to the matched column
// indices of the header row.
func (f format) match(headerRow []string) map[string][]int {
results := make(map[string][]int)
nextCol:
for i, colName := range headerRow {
for canonicalField, matchers := range f.columns {
for _, matcher := range matchers {
if matcher.MatchString(colName) {
results[canonicalField] = append(results[canonicalField], i)
continue nextCol
}
}
}
}
return results
}
// TODO: there are a lot of columns I'm not accounting for yet that could be useful
var formats = []format{
{
name: "Google Contacts",
columns: map[string][]stringMatcher{
"full_name": {
exact{"name"},
},
"first_name": {
exact{"first name"},
},
"middle_name": {
exact{"middle name"},
},
"last_name": {
exact{"last name"},
},
"birthdate": {
exact{"birthday"},
},
"picture": {
exact{"photo"},
},
timeline.AttributeGender: {
exact{"gender"},
},
timeline.AttributeEmail: {
regexp.MustCompile(`^E-mail \d+ - Value$`),
},
timeline.AttributePhoneNumber: {
regexp.MustCompile(`^Phone \d+ - Value$`),
},
},
},
{
name: "Generic",
columns: map[string][]stringMatcher{
"full_name": {
exact{"name", "full name"},
},
"first_name": {
exact{"first name", "firstname", "given name", "given names", "fname"},
},
"middle_name": {
exact{"middle name", "middlename"},
},
"last_name": {
exact{"last name", "lastname", "surname", "family name", "second name", "lname"},
},
"birthdate": {
exact{"birthday", "birthdate", "birth date", "date of birth", "dob", "bday"},
},
timeline.AttributePhoneNumber: {
exact{"phone", "phone number", "phone no", "telephone", "telephone number", "telephone no"},
},
timeline.AttributeEmail: {
exact{"email", "email address", "electronic mail"},
},
timeline.AttributeGender: {
exact{"gender", "sex"},
},
"picture": {
exact{"photo", "picture", "photograph", "profile picture", "profile photo", "avatar"},
},
},
},
}
// exact matches any of the strings in the slice after normalizing
// the input (which ironically uses regex).
type exact []string
// MatchString returns true if input is equal to any in the exact slice
// after being normalized (lowercase, spaces trimmed, parenthetical
// substrings removed, etc).
func (e exact) MatchString(input string) bool {
input = noise.ReplaceAllString(input, " ") // remove noise
input = whitespace.ReplaceAllString(input, " ") // collapse whitespace
normalized := strings.ToLower(strings.TrimSpace(input))
for _, val := range e {
if val == normalized {
return true
}
}
return false
}
// matches paranthesized text "(*)", characters that aren't part of a word
// and aren't a space, and underscores
var noise = regexp.MustCompile(`\(.*\)|[^\w ]+|_`)
// used for collapsing whitespace
var whitespace = regexp.MustCompile(`\s+`)
type stringMatcher interface {
MatchString(input string) bool
}