The crashes on ExFAT are caused by a bug in the MacOS ExFAT driver. It is unclear whether other OSes are affected too. https://github.com/mattn/go-sqlite3/issues/1355 We now utilize sqlite's concurrency features by creating a write pool (size 1) and a read pool, and can eliminate our own RWMutex, which prevents reads at the same time as writes. Sqlite's WAL mode allows reads concurrent with writes, and our code is much cleaner. Still need to do similar for the thumbnail DB. Also could look into using prepared statements for more efficiency gains.
382 lines
12 KiB
Go
382 lines
12 KiB
Go
/*
|
|
Timelinize
|
|
Copyright (c) 2013 Matthew Holt
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published
|
|
by the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package timeline
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"slices"
|
|
"strconv"
|
|
"time"
|
|
)
|
|
|
|
func (tl *Timeline) Chart(ctx context.Context, chartName string, params url.Values) (any, error) {
|
|
switch chartName {
|
|
case "periodical":
|
|
return tl.chartRecentItems(ctx, params)
|
|
case "classifications":
|
|
return tl.chartItemTypes(ctx)
|
|
case "datasources":
|
|
return tl.chartDataSourceUsage(ctx)
|
|
case "attributes_stacked_area":
|
|
return tl.AttributeStats(ctx, params)
|
|
case "recent_data_sources":
|
|
return tl.chartRecentDaysItemCount(ctx, params)
|
|
default:
|
|
return nil, fmt.Errorf("unknown chart name: %s", chartName)
|
|
}
|
|
}
|
|
|
|
func (tl *Timeline) chartRecentItems(ctx context.Context, params url.Values) (any, error) {
|
|
period := params.Get("period")
|
|
|
|
var dateAdjust, startOf, periodColumn string
|
|
switch period {
|
|
default:
|
|
fallthrough
|
|
case "monthly":
|
|
dateAdjust = "-11 months"
|
|
startOf = "month"
|
|
periodColumn = "strftime('%m', date(timestamp/1000, 'unixepoch'))"
|
|
case "daily":
|
|
dateAdjust = "-28 days"
|
|
startOf = "day"
|
|
periodColumn = "date(timestamp/1000, 'unixepoch')"
|
|
case "yearly":
|
|
dateAdjust = "-10 years"
|
|
startOf = "year"
|
|
periodColumn = "strftime('%Y', date(timestamp/1000, 'unixepoch'))"
|
|
}
|
|
|
|
// this query uses a CTE (common table expression) to find the most recent item timestamp in the database
|
|
// and then subtract the time period we want to visualize... so like, if we want to view the last 12 months
|
|
// including this one, we subtract 11 months and go to the start of that month (so if it's October, we
|
|
// go back 11 months to the start of the last November, since we don't want to include partial October
|
|
// that happened 12 months ago) -- with that value, we can then select the count of items in each period
|
|
// that are newer than the cutoff date we calculated in the CTE.
|
|
// (The `date(min(timestamp/1000, unixepoch()), 'unixepoch')` bit is because the highest item timestamp
|
|
// in the DB might be in the future (calendar events, for example), and may often be wrong (bad data),
|
|
// so we only go from the last item OR today's timestamp, whichever is earlier.)
|
|
//nolint:gosec // all our values are hard-coded
|
|
query := fmt.Sprintf(`
|
|
WITH bounds AS (
|
|
SELECT unixepoch(
|
|
(SELECT date(min(timestamp/1000, unixepoch()), 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1),
|
|
'%s',
|
|
'start of %s'
|
|
)*1000 AS earliest_timestamp
|
|
) SELECT
|
|
%s AS period,
|
|
count()
|
|
FROM items, bounds
|
|
WHERE timestamp > earliest_timestamp
|
|
GROUP BY period
|
|
ORDER BY timestamp`, dateAdjust, startOf, periodColumn)
|
|
|
|
rows, err := tl.db.ReadPool.QueryContext(ctx, query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type periodicStats struct {
|
|
Period string `json:"period"`
|
|
Count int `json:"count"`
|
|
}
|
|
|
|
var results []periodicStats
|
|
for rows.Next() {
|
|
var group string
|
|
var count int
|
|
err := rows.Scan(&group, &count)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if period == "monthly" {
|
|
monthInt, err := strconv.Atoi(group)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("bad month group: %s: %w", group, err)
|
|
}
|
|
group = time.Month(monthInt).String()
|
|
}
|
|
results = append(results, periodicStats{group, count})
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (tl *Timeline) chartItemTypes(ctx context.Context) (any, error) {
|
|
rows, err := tl.db.ReadPool.QueryContext(ctx, "SELECT classification_name, count() FROM extended_items GROUP BY classification_id")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type classificationStat struct {
|
|
ClassificationName string `json:"name"`
|
|
Count int `json:"value"`
|
|
}
|
|
|
|
var results []classificationStat
|
|
for rows.Next() {
|
|
var className *string
|
|
var count int
|
|
err := rows.Scan(&className, &count)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if className == nil {
|
|
classNameStr := "unknown"
|
|
className = &classNameStr
|
|
}
|
|
results = append(results, classificationStat{*className, count})
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
func (tl *Timeline) chartRecentDaysItemCount(ctx context.Context, params url.Values) (any, error) {
|
|
days, err := strconv.Atoi(params.Get("days"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
const msPerDay = int(24 * time.Hour / time.Millisecond)
|
|
|
|
minTime := msPerDay * days
|
|
|
|
// the LIMIT is arbitrary, just to prevent an accidentally huge resultset
|
|
// (the `date(min(timestamp/1000, unixepoch()), 'unixepoch'))` bit is the same as similar in another chart query,
|
|
// to prevent future-timestampped items, which are usually wrong or from calendars, from crowding out all
|
|
// the other data on the chart; so the latest timestamp we accept is today's date)
|
|
rows, err := tl.db.ReadPool.QueryContext(ctx, `
|
|
SELECT
|
|
strftime('%Y-%m-%d', date(min(timestamp/1000, unixepoch()), 'unixepoch')) AS date,
|
|
data_source_name,
|
|
count()
|
|
FROM extended_items
|
|
WHERE timestamp > unixepoch()*1000 - ?
|
|
GROUP BY date, data_source_name
|
|
LIMIT 2000`, minTime)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type recentDaysCount struct {
|
|
Date string `json:"date"`
|
|
DataSourceName string `json:"data_source_name"`
|
|
Count int `json:"count"`
|
|
}
|
|
|
|
var results []recentDaysCount
|
|
for rows.Next() {
|
|
var date, dsName string
|
|
var count int
|
|
err := rows.Scan(&date, &dsName, &count)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
results = append(results, recentDaysCount{date, dsName, count})
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
/*
|
|
SELECT
|
|
date(timestamp/1000, 'unixepoch') as date,
|
|
strftime('%H', time(timestamp/1000, 'unixepoch')) as hour,
|
|
data_source_name,
|
|
count()
|
|
FROM extended_items
|
|
-- WHERE timestamp > unixepoch((SELECT date(timestamp/1000, 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-30 days')*1000
|
|
WHERE id % 1 = 0
|
|
-- AND timestamp IS NOT NULL
|
|
AND timestamp > unixepoch((SELECT date(timestamp/1000, 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-365 days')*1000
|
|
GROUP BY date, hour
|
|
ORDER BY timestamp
|
|
LIMIT 1000;
|
|
*/
|
|
|
|
// chartDataSourceUsage returns the counts by data source.
|
|
func (tl *Timeline) chartDataSourceUsage(ctx context.Context) (any, error) {
|
|
// This query is a bit of a mouthful, but basically, its purpose is to retrieve a sampling of items
|
|
// that are likely representative of the whole repo, and group them by data source and hour so that
|
|
// they can be rendered onto a chart where the X axis is date, Y is time/hour of day, and Z (the size
|
|
// of the bubble) is the number of items in that group. This particular query accounts for timezone
|
|
// offsets where possible/non-NULL, so that items that were experienced at, say, 10AM, don't show up
|
|
// at ridiculous unholy hours of the morning; `timediff(datetime('now', 'localtime'), datetime())`
|
|
// calculates the local timezone offset, which is used as a second-best guess if the item row does
|
|
// not include an offset. This is a reasonable assumption as long as the person's data generally
|
|
// stays in the same timezone.
|
|
//
|
|
// We currently group by week number; this is arbitrary but strikes a decent balance between group
|
|
// size and granularity. Too granular (i.e. daily) and there's too many data points. Groups too large
|
|
// will lead to a sparse and uninteresting/unhelpful chart. We also currently select timestamps that
|
|
// happened up to a year before the last item in the table, but this is also arbitrary and helps form
|
|
// the readability of the chart. Finally, we limit to 1000 results to avoid burdening the frontend.
|
|
// Another idea is to not constrain by timestamp and instead to sample every Nth row, for example
|
|
// (but still would want to ensure timestamp is not null).
|
|
//
|
|
// Oh, and we make sure to set the end date to the last item, or today's date if the last item is in
|
|
// the future, since future items are outliers and throw off the whole chart.
|
|
rows, err := tl.db.ReadPool.QueryContext(ctx, `
|
|
SELECT
|
|
data_source_name,
|
|
date(timestamp/1000, 'unixepoch') AS date,
|
|
CASE
|
|
WHEN time_offset THEN cast(strftime('%k', time(timestamp/1000, 'unixepoch', time_offset||' seconds')) AS INTEGER)
|
|
ELSE cast(strftime('%k', time(timestamp/1000, 'unixepoch', timediff(datetime('now', 'localtime'), datetime()))) AS INTEGER)
|
|
END AS hour,
|
|
count()
|
|
FROM extended_items
|
|
WHERE timestamp > unixepoch((SELECT date(min(timestamp/1000, unixepoch()), 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-730 days')*1000
|
|
AND timestamp <= unixepoch()*1000
|
|
GROUP BY strftime('%Y %W', date(timestamp/1000, 'unixepoch')), hour
|
|
ORDER BY data_source_name, timestamp
|
|
LIMIT 2000`)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type dsUsageSeries struct {
|
|
Name string `json:"name"`
|
|
Data [][]any `json:"data"` // inner dimension is [x, y, z] or, specifically, [date, hour, count]
|
|
}
|
|
|
|
var all []dsUsageSeries
|
|
|
|
for rows.Next() {
|
|
var date, dsName string
|
|
var hour, count int
|
|
err := rows.Scan(&dsName, &date, &hour, &count)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(all) == 0 || all[len(all)-1].Name != dsName {
|
|
all = append(all, dsUsageSeries{Name: dsName})
|
|
}
|
|
all[len(all)-1].Data = append(all[len(all)-1].Data, []any{date, hour, count})
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return all, nil
|
|
}
|
|
|
|
// AttributeStats returns the counts of items by month for attributes of an entity.
|
|
func (tl *Timeline) AttributeStats(ctx context.Context, params url.Values) (any, error) {
|
|
entityID, err := strconv.Atoi(params.Get("entity_id"))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid entity ID; must be integer: %w", err)
|
|
}
|
|
|
|
rows, err := tl.db.ReadPool.QueryContext(ctx, `
|
|
SELECT
|
|
strftime('%Y', date(items.timestamp/1000, 'unixepoch')) AS year,
|
|
strftime('%m', date(items.timestamp/1000, 'unixepoch')) AS month,
|
|
attributes.name,
|
|
attributes.value,
|
|
count(items.id) AS num
|
|
FROM entity_attributes
|
|
JOIN attributes ON attributes.id = entity_attributes.attribute_id
|
|
JOIN items ON items.attribute_id = attributes.id
|
|
WHERE entity_attributes.entity_id = ?
|
|
AND items.timestamp IS NOT NULL
|
|
AND year IS NOT NULL
|
|
AND month IS NOT NULL
|
|
GROUP BY year, month, entity_attributes.attribute_id
|
|
ORDER BY year, month, attributes.value`, entityID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
type attributeStatsSeries struct {
|
|
Name string `json:"name"` // attribute value
|
|
Data [][]int64 `json:"data"`
|
|
|
|
AttributeName string `json:"attribute_name"` // attribute name is not used by the chart lib, but by our script
|
|
}
|
|
|
|
seriesMap := make(map[string]attributeStatsSeries)
|
|
|
|
var earliest, latest time.Time
|
|
|
|
for rows.Next() {
|
|
var year, month int
|
|
var count int64
|
|
var attrName, attrValue string
|
|
err := rows.Scan(&year, &month, &attrName, &attrValue, &count)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if _, ok := seriesMap[attrValue]; !ok {
|
|
seriesMap[attrValue] = attributeStatsSeries{Name: attrValue, AttributeName: attrName}
|
|
}
|
|
monthYear := time.Date(year, time.Month(month), 1, 0, 0, 0, 0, time.UTC)
|
|
if monthYear.Before(earliest) || earliest.IsZero() {
|
|
earliest = monthYear
|
|
}
|
|
if monthYear.After(latest) || latest.IsZero() {
|
|
latest = monthYear
|
|
}
|
|
series := seriesMap[attrValue]
|
|
series.Data = append(series.Data, []int64{monthYear.UnixMilli(), count})
|
|
seriesMap[attrValue] = series
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
///////
|
|
for k, series := range seriesMap {
|
|
var seriesIdx int
|
|
for ts := earliest; !ts.After(latest); ts = ts.AddDate(0, 1, 0) {
|
|
if seriesIdx >= len(series.Data) {
|
|
series.Data = append(series.Data, []int64{ts.UnixMilli(), 0})
|
|
}
|
|
if ts.UnixMilli() < series.Data[seriesIdx][0] {
|
|
series.Data = slices.Insert(series.Data, seriesIdx, []int64{ts.UnixMilli(), 0})
|
|
}
|
|
seriesIdx++
|
|
}
|
|
seriesMap[k] = series
|
|
}
|
|
///////
|
|
|
|
results := make([]attributeStatsSeries, 0, len(seriesMap))
|
|
for _, series := range seriesMap {
|
|
results = append(results, series)
|
|
}
|
|
|
|
return results, nil
|
|
}
|