timelinize/tlzapp/bindings.go

/*
	Timelinize
	Copyright (c) 2013 Matthew Holt

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU Affero General Public License as published
	by the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU Affero General Public License for more details.

	You should have received a copy of the GNU Affero General Public License
	along with this program.  If not, see <https://www.gnu.org/licenses/>.
*/

package tlzapp

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io/fs"
	"net/http"
	"net/url"
	"os"
	"path"
	"path/filepath"
	"runtime"
	"slices"
	"sort"
	"strings"
	"time"

	"github.com/mholt/archives"
	"github.com/timelinize/timelinize/timeline"
	"go.uber.org/zap"
)

func (app App) fileSelectorRoots() ([]fileSelectorRoot, error) {
	return getFileSelectorRoots()
}

func (app App) getOpenRepositories() []openedTimeline {
	openTimelinesMu.RLock()
	repos := make([]openedTimeline, 0, len(openTimelines))
	for _, otl := range openTimelines {
		// make sure repository still exists after it was opened
		dbFile := filepath.Join(otl.RepoDir, timeline.DBFilename)
		_, err := os.Stat(dbFile)
		if err != nil {
			// huh, it's either gone or we can't access it...
			// so close it; this removes it from our list
			app.log.Error("timeline database no longer found; closing",
				zap.String("repo", otl.InstanceID.String()),
				zap.String("db", dbFile))
			// defer because it needs a write lock on the openTimelinesMu
			defer func() {
				err := app.CloseRepository(otl.InstanceID.String())
				if err != nil {
					app.log.Error("closing repository", zap.Error(err))
				}
			}()
		} else {
			// ok, it's still there
			repos = append(repos, otl)
		}
	}
	openTimelinesMu.RUnlock()
	return repos
}

// openRepository opens the timeline at repoDir as long as it
// is not already open.
func (app *App) openRepository(ctx context.Context, repoDir string, create bool) (openedTimeline, error) {
	absRepo, err := filepath.Abs(repoDir)
	if err != nil {
		return openedTimeline{}, fmt.Errorf("forming absolute path to repo at '%s': %w", repoDir, err)
	}

	openTimelinesMu.Lock()
	defer openTimelinesMu.Unlock()

	// don't allow a timeline to be opened twice (folder path is a good
	// pre-check, but in theory a timeline is only unique by its ID, which
	// we check later)
	for _, otl := range openTimelines {
		if otl.RepoDir == absRepo {
			return openedTimeline{}, fmt.Errorf("timeline at %s is already open", absRepo)
		}
	}

	// determine if timeline can be opened or created here
	assessment := timeline.AssessFolder(absRepo)
	if (!create && !assessment.HasTimeline) || (create && !assessment.TimelineCanBeCreated) {
		return openedTimeline{}, assessment
	}

	var tl *timeline.Timeline
	if create {
		tl, err = timeline.Create(ctx, assessment.TimelinePath)
	} else {
		resumeJobs := app.cfg == nil || app.cfg.ResumeJobs == nil || *app.cfg.ResumeJobs
		tl, err = timeline.Open(ctx, assessment.TimelinePath, resumeJobs)
	}
	if err != nil {
		return openedTimeline{}, err
	}
	tlID := tl.ID().String()

	// in very few places, the timeline package may emit data directly
	// to the frontend in the form of logs, so even though obfuscation
	// is an application concern, a timeline needs to know, in those places,
	// whether to obfuscate the output... since the timeline package
	// cannot import this one, we cheat and invert the dependencies
	tl.SetObfuscationFunc(func() (timeline.ObfuscationOptions, bool) {
		return app.ObfuscationMode(tl)
	})

	// check once more that the timeline is not already open; we only
	// compared folder paths, now we have actual IDs to compare
	for _, otl := range openTimelines {
		if otl.InstanceID == tl.ID() {
			err = tl.Close()
			if err != nil {
				app.log.Error("closing redundantly-opened timeline",
					zap.Error(err),
					zap.String("timeline", assessment.TimelinePath))
			}
			return openedTimeline{}, fmt.Errorf("timeline with ID %s is already open", otl.InstanceID)
		}
	}

	// for serving static data files from the timeline
	fileServerPrefix := "/" + path.Join("repo", tlID)
	fileRoot := assessment.TimelinePath
	fileServer := http.FileServer(http.Dir(fileRoot))

	otl := openedTimeline{
		RepoDir:    assessment.TimelinePath,
		InstanceID: tl.ID(),
		Timeline:   tl,
		fileServer: http.StripPrefix(fileServerPrefix, fileServer),
	}

	openTimelines[tlID] = otl

	// make appropriate log message
	action := "opened"
	if create {
		action = "created"
	}
	app.log.Info(action+" timeline",
		zap.String("repo", assessment.TimelinePath),
		zap.String("id", tlID))

	// persist newly opened repo so it can be resumed on restart
	if err := app.cfg.syncOpenRepos(); err != nil {
		app.log.Error("unable to persist config", zap.Error(err))
	}

	// start python server if it hasn't started already, if this timeline enables those features
	if enabled, ok := tl.GetProperty(ctx, "semantic_features").(bool); ok && enabled {
		app.pyServerMu.Lock()
		pyServer := app.pyServer
		app.pyServerMu.Unlock()
		if pyServer == nil {
			if err := app.startPythonServer(timeline.PyHost, timeline.PyPort); err != nil {
				app.log.Error("failed starting Python server", zap.Error(err))
			}
		}
	}

	return otl, nil
}

func (app *App) CloseRepository(repoID string) error {
	openTimelinesMu.Lock()
	defer openTimelinesMu.Unlock()

	otl, ok := openTimelines[repoID]
	if !ok {
		return fmt.Errorf("timeline %s is not open", repoID)
	}

	if err := otl.Close(); err != nil {
		return err
	}

	delete(openTimelines, repoID)

	app.log.Info("closed timeline",
		zap.String("repo", repoID),
		zap.String("id", otl.ID().String()))

	// persist newly closed repo
	if err := app.cfg.syncOpenRepos(); err != nil {
		app.log.Error("unable to persist config", zap.Error(err))
	}

	return nil
}

func (app App) AddEntity(repoID string, entity timeline.Entity) error {
	tl, err := getOpenTimeline(repoID)
	if err != nil {
		return err
	}
	return tl.StoreEntity(context.TODO(), entity)
}

func (app App) GetEntity(repoID string, entityID uint64) (timeline.Entity, error) {
	tl, err := getOpenTimeline(repoID)
	if err != nil {
		return timeline.Entity{}, err
	}
	ent, err := tl.LoadEntity(entityID)
	if err != nil {
		return timeline.Entity{}, err
	}
	if options, obfuscate := app.ObfuscationMode(tl.Timeline); obfuscate {
		ent.Anonymize(options)
	}
	return ent, nil
}

// func (a App) AddAccount(repoID string, dataSourceID string, auth bool, dsOpt json.RawMessage) (timeline.Account, error) {
// 	tl, err := getOpenTimeline(repoID)
// 	if err != nil {
// 		return timeline.Account{}, err
// 	}

// 	ds, err := timeline.GetDataSource(dataSourceID)
// 	if err != nil {
// 		return timeline.Account{}, err
// 	}

// 	// for the 'files' data source, the default user ID can and probably should be user@hostname.
// 	if payload.DataSource == files.DataSourceID && payload.Owner.UserID == "" {
// 		var username, hostname string

// 		u, err := user.Current()
// 		if err == nil {
// 			username = u.Username
// 		} else {
// 			s.log.Error("looking up current user", zap.Error(err))
// 		}

// 		hostname, err = os.Hostname()
// 		if err != nil {
// 			s.log.Error("looking up hostname", zap.Error(err))
// 		}

// 		// set some sane, slightly recognizable defaults, I guess
// 		if username == "" {
// 			if payload.Owner.Name != "" {
// 				username = payload.Owner.Name
// 			} else {
// 				username = "me"
// 			}
// 		}
// 		if hostname == "" {
// 			hostname = "localhost"
// 		}

// 		payload.Owner.UserID = username + "@" + hostname
// 	}

// 	acct, err := tl.AddAccount(a.ctx, dataSourceID, dsOpt)
// 	if err != nil {
// 		return timeline.Account{}, err
// 	}

// 	if auth {
// 		err = ds.NewAPIImporter().Authenticate(a.ctx, acct, dsOpt)
// 		if err != nil {
// 			return timeline.Account{}, err
// 		}
// 	}

// 	return acct, nil
// }

func (app App) RepositoryIsEmpty(repo string) (bool, error) {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return false, err
	}
	return tl.Empty(), nil
}

func (app App) AuthAccount(repo string, accountID int64, dsOpt json.RawMessage) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}

	account, err := tl.LoadAccount(app.ctx, accountID)
	if err != nil {
		return err
	}

	if account.DataSource.NewAPIImporter == nil {
		return fmt.Errorf("data source does not support authentication: %s", account.DataSource.Name)
	}

	dataSourceOpts, err := account.DataSource.UnmarshalOptions(dsOpt)
	if err != nil {
		return fmt.Errorf("unmarshaling data source options: %w", err)
	}

	apiImporter := account.DataSource.NewAPIImporter()

	err = apiImporter.Authenticate(app.ctx, account, dataSourceOpts)
	if err != nil {
		return err
	}

	return nil
}

// PlannerOptions configures how an import plan is created.
type PlannerOptions struct {
	Path             string `json:"path"` // file system path (with OS separators)
	Recursive        bool   `json:"recursive"`
	TraverseArchives bool   `json:"traverse_archives"`

	// Skip files and folders starting with a dot (.) regardless of platform
	SkipDotFiles bool `json:"skip_dot_files"`

	// Skip files and folders considered hidden by OS convention
	SkipHiddenFiles bool `json:"skip_hidden_files"`

	timeline.RecognizeParams
}

// PlanImport produces an import plan with the given settings.
func (app *App) PlanImport(ctx context.Context, options PlannerOptions) (timeline.ProposedImportPlan, error) {
	var plan timeline.ProposedImportPlan

	logger := app.log.Named("import_planner").With(zap.String("root", options.Path))

	var fsys fs.FS
	if options.TraverseArchives {
		fsys = &archives.DeepFS{Root: options.Path, Context: ctx}
	} else {
		var err error
		fsys, err = archives.FileSystem(ctx, options.Path, nil)
		if err != nil {
			return plan, err
		}
	}

	var (
		tree       []string                                         // for tracking the dir tree during the walk
		currentDir string                                           // our current directory (the last element of tree)
		pairings   = make(map[string][]timeline.ProposedFileImport) // the matches accumulated through the walk
		dirSizes   = make(map[string]int)                           // number of (non-hidden) entries discovered in each directory
	)

	// finalizeDirectory is called during a walk as we move into
	// a new directory, or when a walk is finished. It counts the
	// number of matches by data source and checks if any of them
	// can "claim" the directory as a whole after having matched
	// enough individual entries within it. If so, the individual
	// matches are replaced with a single match for the whole
	// directory. (More than 1 data source may match the dir.)
	finalizeDirectory := func(dir string) {
		currentPairings := pairings[dir]

		// no nee to sort/filter/consolidate if there's only 1 file
		if len(currentPairings) <= 1 {
			return
		}

		// start by iterating the pairings of matches from this
		// directory only, and counting the number of entries that
		// each data source matched; then sort by most matches
		var counts dataSourceCounts
		for _, p := range currentPairings {
			for _, match := range p.DataSources {
				if match.DirThreshold > 0 {
					counts.count(match)
				}
			}
		}
		sort.Slice(counts, func(i, j int) bool {
			return counts[i].count > counts[j].count
		})

		// now find any data sources that met their threshold for folding all
		// the matches in the directory into the directory itself; if none
		// reached the threshold, then there's nothing to do (just keep the
		// individual recognition matches as-is); otherwise, we will replace
		// those with one for the whole dir
		var consolidatedMatches []timeline.DataSourceRecognition
		for _, c := range counts {
			// it's possible for a data source to support matching a directory
			// both explicitly (usually by inspecting its contents for a specific
			// structure or name) and implicitly (by specifying a match threshold
			// for the files within it); in that case, the threshold may be greater
			// than 0 even if the recognizer matched the dir explicitly, which does
			// not traverse into it, which results in zero match counts inside the
			// dir... this ends up being a division by 0, which we need to avoid
			// (we can fix this by making the increment of dirSizes[dir] below to
			// not be conditional on not a directory, but it means that percentage
			// can never reach 100% because the actual dir would be counted as part
			// of the size, but not as an explicit match... this seems like the
			// best fix I can think of)
			if dirSizes[dir] == 0 {
				continue
			}

			percentage := float64(c.count) / float64(dirSizes[dir])
			if percentage > c.dirThreshold {
				// this data source matched enough entries in the directory to
				// meet its self-specified threshold, so consider the entire
				// directory a match instead of each individual item
				consolidatedMatches = append(consolidatedMatches, timeline.DataSourceRecognition{
					DataSource: c.ds,
					Recognition: timeline.Recognition{
						Confidence: percentage,
					},
				})
			}
		}

		if len(consolidatedMatches) > 0 {
			// this entire directory is being consolidated, since at least one
			// data source reached the threshold for individual matches within
			// the directory; delete the individual pairings from the walk and
			// replace them all with our single new pairing representing the
			// whole directory
			filename := filepath.Join(filepath.Dir(options.Path), filepath.FromSlash(dir))
			ftype := fileTypeDir
			if archives.PathContainsArchive(filename) {
				ftype = fileTypeArchive
			}

			// we need to be careful not to wipe out matches from other data sources
			// within this directory (imagine, for example, a folder of jpgs, with a
			// single vcard file, where the jpgs are contact pictures; the media data
			// source should not wipe out the vcard match!); these loops look scary,
			// but all they do is remove the matches for individual files within the
			// folder being consolidated, *only for the data sources that are collapsing
			// the folder* - we leave the matches from data sources that don't
			// support/qualify collapsing the folder, since they could still be useful
			// (consider the vcard example).
			// TODO: Since this logic allows data sources to overlap paths (e.g. media could claim a folder, and vcard could match a file inside it), maybe we should enable some UI interaction/notice to ensure this is desired, OR make an import planner option the user can set to control this
			for d, p := range pairings {
				if strings.HasPrefix(d, dir) {
					for i := 0; i < len(p); i++ {
						for j := 0; j < len(p[i].DataSources); j++ {
							if hasDataSource(consolidatedMatches, p[i].DataSources[j].DataSource) {
								p[i].DataSources = append(p[i].DataSources[:j], p[i].DataSources[j+1:]...)
								j--
							}
						}
						if len(p[i].DataSources) == 0 {
							p = append(p[:i], p[i+1:]...)
							pairings[d] = p
							i--
						}
					}
					if len(pairings[d]) == 0 {
						delete(pairings, d)
					}
				}
			}

			// now that we've removed individual file matches from data sources within this
			// folder that are being consolidated to the folder level, add the match that
			// actually represents those data sources at the folder level
			pairings[dir] = append(pairings[dir], timeline.ProposedFileImport{
				Filename:    filename,
				FileType:    ftype,
				DataSources: consolidatedMatches,
			})
		}
	}

	// Prepare for walk. it's a little inconvenient, actually, that fs.WalkDir() is the conventional
	// way to walk a file system, since it linearizes it, i.e., abstracts the recursion away to a
	// single function. The recursion would be useful for knowing exactly when we're bubbling up
	// out of a directory, or traversing deeper in, without having to keep track of the filenames
	// as we go, and doing prefix comparisons, etc. But I bet the std lib handles edge cases
	// that I don't want to think about, so I'm going to stick to using fs.WalkDir().

	startingDir := filepath.Base(options.Path)
	tree = append(tree, startingDir)
	currentDir = startingDir

	err := fs.WalkDir(fsys, ".", func(fpath string, d fs.DirEntry, err error) error {
		if err := ctx.Err(); err != nil {
			return err
		}
		if err != nil {
			// sometimes, archives may contain filenames with invalid encoding,
			// or directories may have an archive extension (but are not actually
			// archives; ignore such errors and just
			// Most common errors I've seen: fs.ErrInvalid, zip.Err*
			logger.Warn("encountered error during walk; skipping",
				zap.String("path", fpath),
				zap.Error(err))
			return nil
		}

		// skip files and folders as configured, except for the file the user explicitly selected (".")
		if fpath != "." {
			if options.SkipDotFiles && strings.HasPrefix(path.Base(d.Name()), ".") {
				if d.IsDir() {
					return fs.SkipDir
				}
				return nil
			}
			if options.SkipHiddenFiles {
				hidden, err := dirEntryHidden(d)
				if err != nil {
					logger.Warn("could not determine if file is hidden", zap.String("path", fpath), zap.Error(err))
				}
				if hidden {
					if d.IsDir() {
						return fs.SkipDir
					}
					return nil
				}
			}
		}

		// check if we've entered a new directory, and if so,
		// check if we need to fold all the individual matches
		// into a single directory-wide match
		dir := path.Join(filepath.Base(options.Path), path.Dir(fpath)) // account for fpath being "." by just always prepending the root dir name instead
		if dir != currentDir {
			// TODO: This isn't really a helpful log, because it gets sampled, so if we're stuck on a dir for a long time, it might not be the dir that was last logged
			logger.Info("traversing directory", zap.String("dir", dir))

			// compare prefixes by appending "/" to prevent false positives with a scenario like "a/b" and "a/bb"; they are different subfolders!
			if strings.HasPrefix(dir, currentDir+"/") {
				// we have recursed into a subdirectory

				// I've found that when we enter a subdir, we may have left a subdir tree that we were in,
				// i.e. this new dir might not be a subdir of the folder we were last in; so we need to
				// check our tree and keep it in sync, popping off dirs until we get back to the closest
				// common denominator with this new one.
				for len(tree) > 0 && !strings.HasPrefix(dir, tree[len(tree)-1]+"/") {
					finalizeDirectory(tree[len(tree)-1])
					tree = tree[:len(tree)-1]
				}

				tree = append(tree, dir)
			} else {
				// we have finished a directory and are going up
				tree = tree[:len(tree)-1]
				finalizeDirectory(currentDir)
			}

			// update state for new dir
			currentDir = dir
		}

		// don't let directories count against the counts when it comes to consolidating results; doesn't seem right
		if !d.IsDir() {
			dirSizes[currentDir]++
		}

		// we make this DirEntry slightly different than we do when importing, due
		// to the nature of the recognition process (we're doing the walk for the
		// data source so they don't have to)
		walkedFile := timeline.DirEntry{
			DirEntry: d,
			FS:       fsys,
			FSRoot:   options.Path,
			Filename: fpath,
		}

		results, err := timeline.DataSourcesRecognize(ctx, walkedFile, options.RecognizeParams)
		if err != nil {
			return fmt.Errorf("recognizing %s: %w", fpath, err)
		}

		// fast-path if no results
		if len(results) == 0 {
			if options.Recursive {
				return nil // traverse into directory (if it is one)
			}
			if d.IsDir() {
				return fs.SkipDir // skip directory
			}
			return nil
		}

		ftype := fileTypeFile
		if archives.PathContainsArchive(path.Join(filepath.ToSlash(options.Path), fpath)) {
			ftype = fileTypeArchive
		} else if d.IsDir() {
			// remember that the underlying FS, if it is a DeepFS, can report
			// an archive as a directory, so do this check if it's not an archive
			ftype = fileTypeDir
		}

		// map the filename to its results, keeping all results within this directory together
		// (we need them grouped in case we consolidate all the results to the directory itself,
		// we end up deleting all the individual entry results; we linearize the pairings later)
		pairings[currentDir] = append(pairings[currentDir], timeline.ProposedFileImport{
			Filename:    filepath.Join(options.Path, filepath.FromSlash(fpath)),
			FileType:    ftype,
			DataSources: results,
		})

		// skip directory; since this filename was recognized, if it was a directory,
		// we don't need to traverse into it even if recursion is enabled, as a data
		// souce will be handling it
		if d.IsDir() {
			return fs.SkipDir
		}
		return nil
	})
	if err != nil {
		return plan, fmt.Errorf("walking tree rooted at %s: %w (options=%+v fs=%#v)", options.Path, err, options, fsys)
	}

	// make sure to finalize/process/reduce the final directory we walked
	finalizeDirectory(currentDir)

	// make sure to check our tree for any base cases (end of dir; going up a dir) that
	// may have happened implicitly during the recursive walk without an opportunity
	// to close out those directories (see similar logic above in the walk fn)
	for len(tree) > 0 {
		finalizeDirectory(tree[len(tree)-1])
		tree = tree[:len(tree)-1]
	}

	// linearize the map of results
	for _, p := range pairings {
		plan.Files = append(plan.Files, p...)
	}

	// We want to get as much content into the DB as soon as possible, both to help imports go faster (DBs are
	// fastest when they are small), and to improve the UX (user can start browsing more content right away).
	// Except for contact lists, sort data sources so that those which tend to add lots of content quickly go
	// first. Then put I/O-heavy data sources at the end. Imagine if we imported their photo library first...
	// they'd have to wait potentially hours and hours before they can browse, since thumbnails don't get
	// generated until after the whole import is complete (unless they enable it during the import, but then
	// it's super slow!). This way, the user can browse potentially hundreds of thousands of items while
	// waiting for the slower data sources to finish and have thumbnails generated.
	dsPriorities := []string{
		// then we prioritize data sources with large amounts of small items; when the DB is
		// small, imports are fastest, so putting data sources with the most small items up
		// first makes imports faster
		"google_location",
		"gpx",
		"geojson",
		"kml",
		"nmea0183",
		"strava",

		// these next ones are a blend of lots of items and I/O heavy
		"sms_backup_restore",
		"whatsapp",
		"telegram",
		"facebook",
		"email",
		"imessage",
		"twitter",
		"instagram",
		"iphone",
		"google_voice", // at the end of this group since every conversation is a different file, so it's actually really slow

		// the remaining ones are mostly I/O heavy, but can still have lots of items
		"media",
		"icloud",
		"apple_photos",
		"google_photos",

		// contact lists can be slow because of downloading profile pictures
		"vcard",
		"contact_list",
		"apple_contacts",
	}
	slices.SortStableFunc(plan.Files, func(a, b timeline.ProposedFileImport) int {
		if len(a.DataSources) == 0 || len(b.DataSources) == 0 {
			return 0
		}
		aDS, bDS := a.DataSources[0].DataSource.Name, b.DataSources[0].DataSource.Name
		aIdx, bIdx := slices.Index(dsPriorities, aDS), slices.Index(dsPriorities, bDS)
		if aIdx < 0 && bIdx >= 0 {
			return 1
		}
		if aIdx >= 0 && bIdx < 0 {
			return -1
		}
		if aIdx < bIdx {
			return -1
		} else if aIdx > bIdx {
			return 1
		}
		return 0
	})

	return plan, nil
}

func hasDataSource(matches []timeline.DataSourceRecognition, target timeline.DataSource) bool {
	for _, m := range matches {
		if m.DataSource.Name == target.Name {
			return true
		}
	}
	return false
}

const (
	fileTypeFile    = "file"
	fileTypeDir     = "dir"
	fileTypeArchive = "archive"
)

type dataSourceCount struct {
	ds           timeline.DataSource
	count        int
	dirThreshold float64
}

type dataSourceCounts []dataSourceCount

func (dsCounts *dataSourceCounts) count(match timeline.DataSourceRecognition) {
	idx := -1
	for i, c := range *dsCounts {
		if c.ds.Name == match.DataSource.Name {
			idx = i
			break
		}
	}
	if idx < 0 {
		*dsCounts = append(*dsCounts, dataSourceCount{
			ds:           match.DataSource,
			count:        1,
			dirThreshold: match.DirThreshold,
		})
		return
	}
	(*dsCounts)[idx].count++
	(*dsCounts)[idx].dirThreshold = match.DirThreshold
}

type ImportParameters struct {
	Repo string              `json:"repo"`
	Job  *timeline.ImportJob `json:"job"`

	// For external data sources: (TODO: ... figure this out)
	// DataSource timeline.DataSource // required: Name, Title, Icon, Description
}

func (app App) Import(params ImportParameters) (uint64, error) {
	tl, err := getOpenTimeline(params.Repo)
	if err != nil {
		return 0, err
	}
	// queue job for a brief period to allow UI to render job page first and to help
	// user get their bearings, unless it's interactive: then just start right away
	// since the user will be waiting for the first item
	const queueDuration = 5 * time.Second
	scheduled := time.Now().Add(queueDuration)
	if params.Job.ProcessingOptions.Interactive != nil {
		scheduled = time.Time{}
	}
	return tl.CreateJob(params.Job, scheduled, 0, 0, 0)
}

func (App) NextGraph(repoID string, jobID uint64) (*timeline.Graph, error) {
	tl, err := getOpenTimeline(repoID)
	if err != nil {
		return nil, err
	}
	return tl.Timeline.NextGraphFromImport(jobID)
}

func (App) SubmitGraph(repoID string, jobID uint64, g *timeline.Graph, skip bool) error {
	tl, err := getOpenTimeline(repoID)
	if err != nil {
		return err
	}
	return tl.Timeline.SubmitGraph(jobID, g, skip)
}

func (app *App) SearchItems(params timeline.ItemSearchParams) (timeline.SearchResults, error) {
	tl, err := getOpenTimeline(params.Repo)
	if err != nil {
		return timeline.SearchResults{}, err
	}
	results, err := tl.Search(app.ctx, params)
	if err != nil {
		return timeline.SearchResults{}, err
	}
	if options, ok := app.ObfuscationMode(tl.Timeline); ok {
		results.Anonymize(options)
	}
	return results, nil
}

// TODO: all of these methods should be cancelable by the browser... somehow

func (app *App) SearchEntities(params timeline.EntitySearchParams) ([]timeline.Entity, error) {
	tl, err := getOpenTimeline(params.Repo)
	if err != nil {
		return nil, err
	}
	results, err := tl.SearchEntities(app.ctx, params)
	if err != nil {
		return nil, err
	}
	if options, ok := app.ObfuscationMode(tl.Timeline); ok {
		for i := range results {
			results[i].Anonymize(options)
		}
	}
	return results, nil
}

func (app *App) DataSources(ctx context.Context, targetDSName string) ([]timeline.DataSourceRow, error) {
	openTimelinesMu.RLock()
	defer openTimelinesMu.RUnlock()

	// use a map for deduplication first
	allMap := make(map[string]timeline.DataSourceRow)

	for _, tl := range openTimelines {
		tlDSes, err := tl.DataSources(ctx, targetDSName)
		if err != nil {
			return nil, err
		}
		for _, tlDS := range tlDSes {
			allMap[tlDS.Name] = tlDS
		}
		if len(tlDSes) > 0 && targetDSName != "" {
			break // found what we're looking for
		}
	}

	// then turn the map which has no duplicates into a slice
	all := make([]timeline.DataSourceRow, 0, len(allMap))
	for _, ds := range allMap {
		all = append(all, ds)
	}

	sort.Slice(all, func(i, j int) bool {
		return all[i].Title < all[j].Title
	})

	return all, nil
}

func (app *App) ItemClassifications(repo string) ([]timeline.Classification, error) {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return nil, err
	}
	return tl.ItemClassifications()
}

// TODO: Very WIP / experimental
func (*App) ChartStats(ctx context.Context, chartName, repoID string, params url.Values) (any, error) {
	tl, err := getOpenTimeline(repoID)
	if err != nil {
		return nil, err
	}
	return tl.Chart(ctx, chartName, params)
}

type Settings struct {
	Application *Config                   `json:"application,omitempty"`
	Timelines   map[string]map[string]any `json:"timelines,omitempty"` // map of repo ID to map of property key to value
}

func (app *App) GetSettings(ctx context.Context) (Settings, error) {
	openTimelinesMu.RLock()
	defer openTimelinesMu.RUnlock()

	timelineSettings := make(map[string]map[string]any)
	for _, tl := range openTimelines {
		tlID := tl.ID().String()
		props, err := tl.Timeline.GetProperties(ctx)
		if err != nil {
			return Settings{}, fmt.Errorf("getting properties of timeline %s: %w", tlID, err)
		}
		timelineSettings[tlID] = props
	}

	return Settings{
		Application: app.cfg,
		Timelines:   timelineSettings,
	}, nil
}

func (app *App) ChangeSettings(ctx context.Context, newSettings *changeSettingsPayload) error {
	if len(newSettings.Timelines) > 0 {
		for repoID, properties := range newSettings.Timelines {
			openTimelinesMu.RLock()
			tl, ok := openTimelines[repoID]
			openTimelinesMu.RUnlock()
			if ok {
				if err := tl.SetProperties(ctx, properties); err != nil {
					return fmt.Errorf("setting properties for timeline %s: %w", repoID, err)
				}
				if semantic, ok := properties["semantic_features"].(bool); ok && semantic {
					if err := app.startPythonServer(timeline.PyHost, timeline.PyPort); err != nil {
						app.log.Error("could not start Python server", zap.Error(err))
					}
				} else {
					if err := app.stopPythonServer(); err != nil {
						app.log.Error("could not stop Python server", zap.Error(err))
					}
				}
			} else {
				return fmt.Errorf("timeline %s is not open", repoID)
			}
		}
	}

	if len(newSettings.Application) > 0 {
		app.cfg.Lock()
		defer app.cfg.Unlock()

		// some settings, when changed, may necessitate a restart of the server/app to take effect
		var restart bool

		for key, val := range newSettings.Application {
			var err error
			switch key {
			case "app.mapbox_api_key":
				err = json.Unmarshal(val, &app.cfg.MapboxAPIKey)
			case "app.website_dir":
				var newVal string
				err = json.Unmarshal(val, &newVal)
				restart = restart || newVal != app.cfg.WebsiteDir
				app.cfg.WebsiteDir = newVal
			case "app.obfuscation.enabled":
				err = json.Unmarshal(val, &app.cfg.Obfuscation.Enabled)
			case "app.obfuscation.locations":
				err = json.Unmarshal(val, &app.cfg.Obfuscation.Locations)
			case "app.obfuscation.data_files":
				err = json.Unmarshal(val, &app.cfg.Obfuscation.DataFiles)
			}
			if err != nil {
				return fmt.Errorf("saving setting %s: %w (value=%s)", key, err, string(val))
			}
		}

		if err := app.cfg.unsyncedSave(); err != nil {
			return fmt.Errorf("saving config: %w", err)
		}

		if restart {
			go func(oldApp *App) {
				oldApp.cancel()

				newApp, err := New(context.Background(), oldApp.cfg, oldApp.embeddedWebsite)
				if err != nil {
					oldApp.log.Error("initializing new app", zap.Error(err))
					return
				}

				started, err := newApp.Serve()
				if err != nil {
					oldApp.log.Fatal("could not start server", zap.Error(err))
				}
				if !started {
					oldApp.log.Error("server not started; maybe the old listener is still bound (please report this as a bug)")
				}
			}(app)
		}
	}

	return nil
}

// TODO: very experimental
func (app *App) LoadRecentConversations(ctx context.Context, params timeline.ItemSearchParams) ([]*timeline.Conversation, error) {
	tl, err := getOpenTimeline(params.Repo)
	if err != nil {
		return nil, err
	}
	convos, err := tl.RecentConversations(ctx, params)
	if err != nil {
		return nil, err
	}
	if options, ok := app.ObfuscationMode(tl.Timeline); ok {
		for _, convo := range convos {
			for i := range convo.Entities {
				convo.Entities[i].Anonymize(options)
			}
			for i := range convo.RecentMessages {
				convo.RecentMessages[i].Anonymize(options)
			}
		}
	}
	return convos, nil
}

func (app App) LoadConversation(ctx context.Context, params timeline.ItemSearchParams) (timeline.SearchResults, error) {
	tl, err := getOpenTimeline(params.Repo)
	if err != nil {
		return timeline.SearchResults{}, err
	}
	convo, err := tl.LoadConversation(ctx, params)
	if err != nil {
		return timeline.SearchResults{}, err
	}
	if options, ok := app.ObfuscationMode(tl.Timeline); ok {
		convo.Anonymize(options)
	}
	return convo, nil
}

func (app App) MergeEntities(repo string, base uint64, others []uint64) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}
	return tl.MergeEntities(app.ctx, base, others)
}

func (app App) DeleteItems(repo string, itemRowIDs []uint64, options timeline.DeleteOptions) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}
	return tl.DeleteItems(app.ctx, itemRowIDs, options)
}

func (app App) Jobs(repo string, jobIDs []uint64, mostRecent int) ([]timeline.Job, error) {
	if repo != "" {
		tl, err := getOpenTimeline(repo)
		if err != nil {
			return nil, err
		}
		return tl.GetJobs(app.ctx, jobIDs, mostRecent)
	}
	return nil, errors.New("TODO: Getting jobs other than by specific IDs not yet implemented")
}

func (app App) CancelJob(ctx context.Context, repo string, jobID uint64) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}
	return tl.CancelJob(ctx, jobID)
}

func (app App) PauseJob(ctx context.Context, repo string, jobID uint64) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}
	return tl.PauseJob(ctx, jobID)
}

func (app App) UnpauseJob(ctx context.Context, repo string, jobID uint64) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}
	return tl.UnpauseJob(ctx, jobID)
}

func (app App) StartJob(ctx context.Context, repo string, jobID uint64, startOver bool) error {
	tl, err := getOpenTimeline(repo)
	if err != nil {
		return err
	}
	return tl.StartJob(ctx, jobID, startOver)
}

type BuildInfo struct {
	GoOS   string `json:"go_os"`
	GoArch string `json:"go_arch"`
}

func (app *App) BuildInfo() BuildInfo {
	return BuildInfo{
		GoOS:   runtime.GOOS,
		GoArch: runtime.GOARCH,
	}
}

func (app App) ObfuscationMode(repo *timeline.Timeline) (timeline.ObfuscationOptions, bool) {
	app.cfg.RLock()
	defer app.cfg.RUnlock()
	return app.cfg.Obfuscation, app.cfg.Obfuscation.AppliesTo(repo)
}