512 lines
14 KiB
Go
512 lines
14 KiB
Go
/*
|
|
Timelinize
|
|
Copyright (c) 2013 Matthew Holt
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published
|
|
by the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
// Package geojson implements a data source for GeoJSON data (RFC 7946): https://geojson.org/
|
|
package geojson
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"math"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/timelinize/timelinize/datasources/googlelocation"
|
|
"github.com/timelinize/timelinize/timeline"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// NOTE: This is very similar to the kmlgx importer, except it's JSON.
|
|
// Almost all other code is nearly identical.
|
|
|
|
func init() {
|
|
err := timeline.RegisterDataSource(timeline.DataSource{
|
|
Name: "geojson",
|
|
Title: "GeoJSON",
|
|
Icon: "geojson.svg",
|
|
Description: "GeoJSON files containing a collection of points",
|
|
NewOptions: func() any { return new(Options) },
|
|
NewFileImporter: func() timeline.FileImporter { return new(FileImporter) },
|
|
})
|
|
if err != nil {
|
|
timeline.Log.Fatal("registering data source", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// Options configures the data source.
|
|
type Options struct {
|
|
// The ID of the owner entity. REQUIRED for linking entity in DB.
|
|
// TODO: maybe an attribute ID instead, in case the data represents multiple people
|
|
OwnerEntityID uint64 `json:"owner_entity_id"`
|
|
|
|
// If true, coordinate arrays beyond 2 elements will attempt
|
|
// to be decoded in non-spec-compliant ways, which is useful
|
|
// if the source data is non-compliant. If the optional 3rd
|
|
// element is too big for altitude, it will be tried as
|
|
// Unix timestamp (seconds); and if a fourth element exists,
|
|
// both third and fourth elements will be tried as timestamp
|
|
// or altitude, depending on their magnitude. See #23.
|
|
Lenient bool `json:"lenient,omitempty"`
|
|
|
|
// Options specific to the location processor.
|
|
googlelocation.LocationProcessingOptions
|
|
}
|
|
|
|
// FileImporter implements the timeline.FileImporter interface.
|
|
type FileImporter struct{}
|
|
|
|
// Recognize returns whether the file is supported.
|
|
func (FileImporter) Recognize(_ context.Context, dirEntry timeline.DirEntry, _ timeline.RecognizeParams) (timeline.Recognition, error) {
|
|
rec := timeline.Recognition{DirThreshold: .9}
|
|
|
|
// we can import directories, but let the import planner figure that out; only recognize files
|
|
if dirEntry.IsDir() {
|
|
return rec, nil
|
|
}
|
|
|
|
// recognize by file extension
|
|
if strings.ToLower(path.Ext(dirEntry.Name())) == ".geojson" {
|
|
rec.Confidence = 1
|
|
}
|
|
|
|
return rec, nil
|
|
}
|
|
|
|
// FileImport conducts an import of the data from a file.
|
|
func (fi *FileImporter) FileImport(ctx context.Context, dirEntry timeline.DirEntry, params timeline.ImportParams) error {
|
|
dsOpt := params.DataSourceOptions.(*Options)
|
|
|
|
return fs.WalkDir(dirEntry.FS, dirEntry.Filename, func(fpath string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
if strings.HasPrefix(d.Name(), ".") {
|
|
// skip hidden files & folders
|
|
if d.IsDir() {
|
|
return fs.SkipDir
|
|
}
|
|
return nil
|
|
}
|
|
if d.IsDir() {
|
|
return nil // traverse into subdirectories
|
|
}
|
|
|
|
// skip unsupported file types
|
|
if ext := strings.ToLower(path.Ext(d.Name())); ext != ".geojson" {
|
|
return nil
|
|
}
|
|
|
|
file, err := dirEntry.FS.Open(fpath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
// create JSON decoder (wrapped to track some state as it decodes)
|
|
jsonDec := &decoder{Decoder: json.NewDecoder(file), lenient: dsOpt.Lenient}
|
|
|
|
// create location processor to clean up any noisy raw data
|
|
locProc, err := googlelocation.NewLocationProcessor(jsonDec, dsOpt.LocationProcessingOptions)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// iterate each resulting location point and process it as an item
|
|
for {
|
|
l, err := locProc.NextLocation(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if l == nil {
|
|
break
|
|
}
|
|
|
|
feature := l.Original.(feature)
|
|
|
|
// use generic properties as metadata
|
|
meta := timeline.Metadata(feature.Properties)
|
|
meta = meta.HumanizeKeys()
|
|
|
|
// fill in special-case, standardized metadata using
|
|
// the same keys as with Google Location History
|
|
meta["Velocity"] = feature.velocity
|
|
meta["Heading"] = feature.heading
|
|
|
|
// include any metadata added by location processor
|
|
meta.Merge(l.Metadata, timeline.MetaMergeReplace)
|
|
|
|
item := &timeline.Item{
|
|
Classification: timeline.ClassLocation,
|
|
Timestamp: l.Timestamp,
|
|
Timespan: l.Timespan,
|
|
Location: l.Location(),
|
|
Owner: timeline.Entity{
|
|
ID: dsOpt.OwnerEntityID,
|
|
},
|
|
Metadata: meta,
|
|
}
|
|
|
|
if params.Timeframe.ContainsItem(item, false) {
|
|
params.Pipeline <- &timeline.Graph{Item: item}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// decoder wraps the JSON decoder to get the next location from the document.
|
|
// It tracks nesting state so we can be sure we're in the right part of the structure.
|
|
type decoder struct {
|
|
*json.Decoder
|
|
foundFeatures bool
|
|
lenient bool
|
|
|
|
// state to persist as we potentially decode locations in batches, depending on structure
|
|
current feature
|
|
positions []position
|
|
}
|
|
|
|
// NextLocation returns the next available point from the JSON document.
|
|
func (dec *decoder) NextLocation(ctx context.Context) (*googlelocation.Location, error) {
|
|
for {
|
|
if err := ctx.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// if we've already decoded a batch of positions, return the next one until the batch is emptied
|
|
if len(dec.positions) > 0 {
|
|
var pos position
|
|
pos, dec.positions = dec.positions[0], dec.positions[1:]
|
|
return pos.location(dec.current, dec.lenient)
|
|
}
|
|
|
|
// if we haven't gotten to the 'features' part of the structure yet, keep going
|
|
if !dec.foundFeatures {
|
|
t, err := dec.Token()
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("decoding next JSON token: %w", err)
|
|
}
|
|
|
|
if val, ok := t.(string); ok && val == "features" {
|
|
tkn, err := dec.Token()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("decoding token after features token: %w", err)
|
|
}
|
|
if delim, ok := tkn.(json.Delim); ok && delim == '[' {
|
|
dec.foundFeatures = true
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
|
|
if !dec.More() {
|
|
break
|
|
}
|
|
|
|
// decode the next feature!
|
|
|
|
dec.current = feature{} // reset it since I'm not sure if Decode writes the fields in-place or if it swaps out everything
|
|
if err := dec.Decode(&dec.current); err != nil {
|
|
return nil, fmt.Errorf("invalid GeoJSON feature: %w", err)
|
|
}
|
|
|
|
// feature properties are basically arbitrary key-value pairs, but a few common
|
|
// ones exist, such as time, altitude, etc; we extract what we can
|
|
if err := dec.current.extractKnownProperties(); err != nil {
|
|
return nil, fmt.Errorf("reading well-known properties of geojson feature: %w", err)
|
|
}
|
|
|
|
switch dec.current.Geometry.Type {
|
|
case "Point":
|
|
var coord position
|
|
if err := json.Unmarshal(dec.current.Geometry.Coordinates, &coord); err != nil {
|
|
return nil, fmt.Errorf("invalid Point coordinates: %w", err)
|
|
}
|
|
return coord.location(dec.current, dec.lenient)
|
|
case "LineString", "MultiPoint":
|
|
if err := json.Unmarshal(dec.current.Geometry.Coordinates, &dec.positions); err != nil {
|
|
return nil, fmt.Errorf("invalid %s coordinates: %w", dec.current.Geometry.Type, err)
|
|
}
|
|
case "MultiLineString":
|
|
var manyPositions [][]position
|
|
if err := json.Unmarshal(dec.current.Geometry.Coordinates, &manyPositions); err != nil {
|
|
return nil, fmt.Errorf("invalid MultiLineString coordinates: %w", err)
|
|
}
|
|
dec.positions = []position{}
|
|
for _, positions := range manyPositions {
|
|
dec.positions = append(dec.positions, positions...)
|
|
}
|
|
default:
|
|
// skip unsupported types
|
|
continue
|
|
}
|
|
}
|
|
|
|
return nil, nil
|
|
}
|
|
|
|
// see https://datatracker.ietf.org/doc/html/rfc7946#section-3.1
|
|
type feature struct {
|
|
Type string `json:"type"`
|
|
Properties map[string]any `json:"properties,omitempty"`
|
|
Geometry struct {
|
|
Type string `json:"type"`
|
|
Coordinates json.RawMessage `json:"coordinates"`
|
|
} `json:"geometry"`
|
|
|
|
// certain values extracted (and removed) from "well-known" (obvious or common) keys in Properties
|
|
time time.Time
|
|
altitude float64 // meters
|
|
accuracy float64 // meters (higher values are less accurate; should probably be called "error" instead)
|
|
velocity float64 // meters per second
|
|
heading float64 // degrees
|
|
}
|
|
|
|
func (f *feature) extractKnownProperties() error {
|
|
// we use this to try to guess whether Unix timestamp may be in seconds or milliseconds
|
|
const year2286ApproxUnixSec = 10000000000
|
|
|
|
// time
|
|
for _, propName := range []string{
|
|
"time",
|
|
"timestamp",
|
|
"time_long",
|
|
"datetime",
|
|
"date_time",
|
|
} {
|
|
// stop trying once we've got a time value
|
|
if !f.time.IsZero() {
|
|
break
|
|
}
|
|
|
|
switch val := f.Properties[propName].(type) {
|
|
case string:
|
|
for _, format := range []string{
|
|
time.RFC3339,
|
|
time.RFC3339Nano,
|
|
time.RFC850,
|
|
time.RFC822,
|
|
time.RFC822Z,
|
|
time.RFC1123,
|
|
time.RFC1123Z,
|
|
} {
|
|
t, err := time.Parse(format, val)
|
|
if err == nil {
|
|
f.time = t
|
|
delete(f.Properties, propName)
|
|
break
|
|
}
|
|
}
|
|
case int:
|
|
if val < year2286ApproxUnixSec {
|
|
f.time = time.Unix(int64(val), 0)
|
|
delete(f.Properties, propName)
|
|
} else {
|
|
f.time = time.UnixMilli(int64(val))
|
|
delete(f.Properties, propName)
|
|
}
|
|
case int64:
|
|
if val < year2286ApproxUnixSec {
|
|
f.time = time.Unix(val, 0)
|
|
delete(f.Properties, propName)
|
|
} else {
|
|
f.time = time.UnixMilli(val)
|
|
delete(f.Properties, propName)
|
|
}
|
|
case float64:
|
|
sec, dec := math.Modf(val)
|
|
if sec < year2286ApproxUnixSec {
|
|
f.time = time.Unix(int64(sec), int64(dec*(1e9)))
|
|
delete(f.Properties, propName)
|
|
} else {
|
|
f.time = time.UnixMilli(int64(sec)) // we don't store more precise than milliseconds
|
|
delete(f.Properties, propName)
|
|
}
|
|
case nil:
|
|
// having a time property isn't mandatory, ignore
|
|
default:
|
|
return fmt.Errorf("unexpected type for time property %s: %T", propName, val)
|
|
}
|
|
}
|
|
|
|
// altitude
|
|
for _, propName := range []string{
|
|
"altitude",
|
|
"elevation",
|
|
"height",
|
|
} {
|
|
// stop trying once we've got a value
|
|
if f.altitude != 0 {
|
|
break
|
|
}
|
|
switch val := f.Properties[propName].(type) {
|
|
case int:
|
|
f.altitude = float64(val)
|
|
delete(f.Properties, propName)
|
|
case int64:
|
|
f.altitude = float64(val)
|
|
delete(f.Properties, propName)
|
|
case float64:
|
|
f.altitude = val
|
|
delete(f.Properties, propName)
|
|
}
|
|
}
|
|
|
|
// accuracy
|
|
for _, propName := range []string{
|
|
"accuracy",
|
|
} {
|
|
// stop trying once we've got a value
|
|
if f.accuracy != 0 {
|
|
break
|
|
}
|
|
switch val := f.Properties[propName].(type) {
|
|
case int:
|
|
f.accuracy = float64(val)
|
|
delete(f.Properties, propName)
|
|
case int64:
|
|
f.accuracy = float64(val)
|
|
delete(f.Properties, propName)
|
|
case float64:
|
|
f.accuracy = val
|
|
delete(f.Properties, propName)
|
|
}
|
|
}
|
|
|
|
// heading
|
|
for _, propName := range []string{
|
|
"heading",
|
|
"bearing",
|
|
"direction",
|
|
} {
|
|
// stop trying once we've got a value
|
|
if f.heading != 0 {
|
|
break
|
|
}
|
|
switch val := f.Properties[propName].(type) {
|
|
case int:
|
|
f.heading = float64(val)
|
|
delete(f.Properties, propName)
|
|
case int64:
|
|
f.heading = float64(val)
|
|
delete(f.Properties, propName)
|
|
case float64:
|
|
f.heading = val
|
|
delete(f.Properties, propName)
|
|
}
|
|
}
|
|
|
|
// velocity
|
|
for _, propName := range []string{
|
|
"velocity",
|
|
"speed",
|
|
} {
|
|
// stop trying once we've got a value
|
|
if f.velocity != 0 {
|
|
break
|
|
}
|
|
switch val := f.Properties[propName].(type) {
|
|
case int:
|
|
f.velocity = float64(val)
|
|
delete(f.Properties, propName)
|
|
case int64:
|
|
f.velocity = float64(val)
|
|
delete(f.Properties, propName)
|
|
case float64:
|
|
f.velocity = val
|
|
delete(f.Properties, propName)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// https://datatracker.ietf.org/doc/html/rfc7946#section-3.1.1
|
|
type position []float64
|
|
|
|
func (p position) location(feature feature, lenient bool) (*googlelocation.Location, error) {
|
|
const minDimensions = 2
|
|
if count := len(p); count < minDimensions {
|
|
return nil, fmt.Errorf("expected at least two values for coordinate, got %d: %+v", count, p)
|
|
}
|
|
latE7, err := googlelocation.FloatToIntE7(p[1])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lonE7, err := googlelocation.FloatToIntE7(p[0])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
altitude, ts := feature.altitude, feature.time
|
|
|
|
// the GeoJSON spec advises against supporting more than the optional 3rd element (altitude),
|
|
// but we've seen messy data (https://github.com/timelinize/timelinize/issues/23) where the
|
|
// third element is timestamp and even a fourth element is altitude sometimes (!!)...
|
|
// that blatantly violates the spec, but we can maybe do some basic sanity checks to see
|
|
// if we can assume those values
|
|
if len(p) > minDimensions {
|
|
if lenient {
|
|
// non-spec-compliant
|
|
var inferredAltitude float64
|
|
var inferredTimestamp time.Time
|
|
minAltitude, maxAltitude := -100.0, 20000.0 // meters
|
|
|
|
// iterate remaining positions
|
|
var remaining = len(p) - 2
|
|
for i := range remaining {
|
|
if p[2+i] > maxAltitude { // time can occur in any position
|
|
inferredTimestamp = time.Unix(int64(p[2+i]), 0)
|
|
} else if i == 0 && p[2+i] > minAltitude { // altitude must be in first position after coordinates
|
|
inferredAltitude = p[2+i]
|
|
}
|
|
// remaining positions can be altitude, speed, signal quality, number of satellites, etc.
|
|
// these are not easily detectable though.
|
|
}
|
|
|
|
if altitude == 0 && inferredAltitude != 0 {
|
|
altitude = inferredAltitude
|
|
}
|
|
if ts.IsZero() && !inferredTimestamp.IsZero() {
|
|
ts = inferredTimestamp
|
|
}
|
|
} else if altitude == 0 {
|
|
// spec compliant; third element is optional but must be altitude in meters if present
|
|
altitude = p[2]
|
|
}
|
|
}
|
|
return &googlelocation.Location{
|
|
Original: feature,
|
|
LatitudeE7: latE7,
|
|
LongitudeE7: lonE7,
|
|
Altitude: altitude,
|
|
Uncertainty: feature.accuracy,
|
|
Timestamp: ts,
|
|
}, nil
|
|
}
|