1
0
Fork 0
timelinize/timeline/charts.go
Matthew Holt e9a7c03c53
Fix ExFAT crashes; refactor sql.DB handling
The crashes on ExFAT are caused by a bug in the MacOS ExFAT driver. It is unclear whether other OSes are affected too.

https://github.com/mattn/go-sqlite3/issues/1355

We now utilize sqlite's concurrency features by creating a write pool (size 1) and a read pool, and can eliminate our own RWMutex, which prevents reads at the same time as writes. Sqlite's WAL mode allows reads concurrent with writes, and our code is much cleaner.

Still need to do similar for the thumbnail DB.

Also could look into using prepared statements for more efficiency gains.
2025-09-30 12:31:41 -06:00

382 lines
12 KiB
Go

/*
Timelinize
Copyright (c) 2013 Matthew Holt
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package timeline
import (
"context"
"fmt"
"net/url"
"slices"
"strconv"
"time"
)
func (tl *Timeline) Chart(ctx context.Context, chartName string, params url.Values) (any, error) {
switch chartName {
case "periodical":
return tl.chartRecentItems(ctx, params)
case "classifications":
return tl.chartItemTypes(ctx)
case "datasources":
return tl.chartDataSourceUsage(ctx)
case "attributes_stacked_area":
return tl.AttributeStats(ctx, params)
case "recent_data_sources":
return tl.chartRecentDaysItemCount(ctx, params)
default:
return nil, fmt.Errorf("unknown chart name: %s", chartName)
}
}
func (tl *Timeline) chartRecentItems(ctx context.Context, params url.Values) (any, error) {
period := params.Get("period")
var dateAdjust, startOf, periodColumn string
switch period {
default:
fallthrough
case "monthly":
dateAdjust = "-11 months"
startOf = "month"
periodColumn = "strftime('%m', date(timestamp/1000, 'unixepoch'))"
case "daily":
dateAdjust = "-28 days"
startOf = "day"
periodColumn = "date(timestamp/1000, 'unixepoch')"
case "yearly":
dateAdjust = "-10 years"
startOf = "year"
periodColumn = "strftime('%Y', date(timestamp/1000, 'unixepoch'))"
}
// this query uses a CTE (common table expression) to find the most recent item timestamp in the database
// and then subtract the time period we want to visualize... so like, if we want to view the last 12 months
// including this one, we subtract 11 months and go to the start of that month (so if it's October, we
// go back 11 months to the start of the last November, since we don't want to include partial October
// that happened 12 months ago) -- with that value, we can then select the count of items in each period
// that are newer than the cutoff date we calculated in the CTE.
// (The `date(min(timestamp/1000, unixepoch()), 'unixepoch')` bit is because the highest item timestamp
// in the DB might be in the future (calendar events, for example), and may often be wrong (bad data),
// so we only go from the last item OR today's timestamp, whichever is earlier.)
//nolint:gosec // all our values are hard-coded
query := fmt.Sprintf(`
WITH bounds AS (
SELECT unixepoch(
(SELECT date(min(timestamp/1000, unixepoch()), 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1),
'%s',
'start of %s'
)*1000 AS earliest_timestamp
) SELECT
%s AS period,
count()
FROM items, bounds
WHERE timestamp > earliest_timestamp
GROUP BY period
ORDER BY timestamp`, dateAdjust, startOf, periodColumn)
rows, err := tl.db.ReadPool.QueryContext(ctx, query)
if err != nil {
return nil, err
}
defer rows.Close()
type periodicStats struct {
Period string `json:"period"`
Count int `json:"count"`
}
var results []periodicStats
for rows.Next() {
var group string
var count int
err := rows.Scan(&group, &count)
if err != nil {
return nil, err
}
if period == "monthly" {
monthInt, err := strconv.Atoi(group)
if err != nil {
return nil, fmt.Errorf("bad month group: %s: %w", group, err)
}
group = time.Month(monthInt).String()
}
results = append(results, periodicStats{group, count})
}
if err := rows.Err(); err != nil {
return nil, err
}
return results, nil
}
func (tl *Timeline) chartItemTypes(ctx context.Context) (any, error) {
rows, err := tl.db.ReadPool.QueryContext(ctx, "SELECT classification_name, count() FROM extended_items GROUP BY classification_id")
if err != nil {
return nil, err
}
defer rows.Close()
type classificationStat struct {
ClassificationName string `json:"name"`
Count int `json:"value"`
}
var results []classificationStat
for rows.Next() {
var className *string
var count int
err := rows.Scan(&className, &count)
if err != nil {
return nil, err
}
if className == nil {
classNameStr := "unknown"
className = &classNameStr
}
results = append(results, classificationStat{*className, count})
}
if err := rows.Err(); err != nil {
return nil, err
}
return results, nil
}
func (tl *Timeline) chartRecentDaysItemCount(ctx context.Context, params url.Values) (any, error) {
days, err := strconv.Atoi(params.Get("days"))
if err != nil {
return nil, err
}
const msPerDay = int(24 * time.Hour / time.Millisecond)
minTime := msPerDay * days
// the LIMIT is arbitrary, just to prevent an accidentally huge resultset
// (the `date(min(timestamp/1000, unixepoch()), 'unixepoch'))` bit is the same as similar in another chart query,
// to prevent future-timestampped items, which are usually wrong or from calendars, from crowding out all
// the other data on the chart; so the latest timestamp we accept is today's date)
rows, err := tl.db.ReadPool.QueryContext(ctx, `
SELECT
strftime('%Y-%m-%d', date(min(timestamp/1000, unixepoch()), 'unixepoch')) AS date,
data_source_name,
count()
FROM extended_items
WHERE timestamp > unixepoch()*1000 - ?
GROUP BY date, data_source_name
LIMIT 2000`, minTime)
if err != nil {
return nil, err
}
defer rows.Close()
type recentDaysCount struct {
Date string `json:"date"`
DataSourceName string `json:"data_source_name"`
Count int `json:"count"`
}
var results []recentDaysCount
for rows.Next() {
var date, dsName string
var count int
err := rows.Scan(&date, &dsName, &count)
if err != nil {
return nil, err
}
results = append(results, recentDaysCount{date, dsName, count})
}
if err := rows.Err(); err != nil {
return nil, err
}
return results, nil
}
/*
SELECT
date(timestamp/1000, 'unixepoch') as date,
strftime('%H', time(timestamp/1000, 'unixepoch')) as hour,
data_source_name,
count()
FROM extended_items
-- WHERE timestamp > unixepoch((SELECT date(timestamp/1000, 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-30 days')*1000
WHERE id % 1 = 0
-- AND timestamp IS NOT NULL
AND timestamp > unixepoch((SELECT date(timestamp/1000, 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-365 days')*1000
GROUP BY date, hour
ORDER BY timestamp
LIMIT 1000;
*/
// chartDataSourceUsage returns the counts by data source.
func (tl *Timeline) chartDataSourceUsage(ctx context.Context) (any, error) {
// This query is a bit of a mouthful, but basically, its purpose is to retrieve a sampling of items
// that are likely representative of the whole repo, and group them by data source and hour so that
// they can be rendered onto a chart where the X axis is date, Y is time/hour of day, and Z (the size
// of the bubble) is the number of items in that group. This particular query accounts for timezone
// offsets where possible/non-NULL, so that items that were experienced at, say, 10AM, don't show up
// at ridiculous unholy hours of the morning; `timediff(datetime('now', 'localtime'), datetime())`
// calculates the local timezone offset, which is used as a second-best guess if the item row does
// not include an offset. This is a reasonable assumption as long as the person's data generally
// stays in the same timezone.
//
// We currently group by week number; this is arbitrary but strikes a decent balance between group
// size and granularity. Too granular (i.e. daily) and there's too many data points. Groups too large
// will lead to a sparse and uninteresting/unhelpful chart. We also currently select timestamps that
// happened up to a year before the last item in the table, but this is also arbitrary and helps form
// the readability of the chart. Finally, we limit to 1000 results to avoid burdening the frontend.
// Another idea is to not constrain by timestamp and instead to sample every Nth row, for example
// (but still would want to ensure timestamp is not null).
//
// Oh, and we make sure to set the end date to the last item, or today's date if the last item is in
// the future, since future items are outliers and throw off the whole chart.
rows, err := tl.db.ReadPool.QueryContext(ctx, `
SELECT
data_source_name,
date(timestamp/1000, 'unixepoch') AS date,
CASE
WHEN time_offset THEN cast(strftime('%k', time(timestamp/1000, 'unixepoch', time_offset||' seconds')) AS INTEGER)
ELSE cast(strftime('%k', time(timestamp/1000, 'unixepoch', timediff(datetime('now', 'localtime'), datetime()))) AS INTEGER)
END AS hour,
count()
FROM extended_items
WHERE timestamp > unixepoch((SELECT date(min(timestamp/1000, unixepoch()), 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-730 days')*1000
AND timestamp <= unixepoch()*1000
GROUP BY strftime('%Y %W', date(timestamp/1000, 'unixepoch')), hour
ORDER BY data_source_name, timestamp
LIMIT 2000`)
if err != nil {
return nil, err
}
defer rows.Close()
type dsUsageSeries struct {
Name string `json:"name"`
Data [][]any `json:"data"` // inner dimension is [x, y, z] or, specifically, [date, hour, count]
}
var all []dsUsageSeries
for rows.Next() {
var date, dsName string
var hour, count int
err := rows.Scan(&dsName, &date, &hour, &count)
if err != nil {
return nil, err
}
if len(all) == 0 || all[len(all)-1].Name != dsName {
all = append(all, dsUsageSeries{Name: dsName})
}
all[len(all)-1].Data = append(all[len(all)-1].Data, []any{date, hour, count})
}
if err := rows.Err(); err != nil {
return nil, err
}
return all, nil
}
// AttributeStats returns the counts of items by month for attributes of an entity.
func (tl *Timeline) AttributeStats(ctx context.Context, params url.Values) (any, error) {
entityID, err := strconv.Atoi(params.Get("entity_id"))
if err != nil {
return nil, fmt.Errorf("invalid entity ID; must be integer: %w", err)
}
rows, err := tl.db.ReadPool.QueryContext(ctx, `
SELECT
strftime('%Y', date(items.timestamp/1000, 'unixepoch')) AS year,
strftime('%m', date(items.timestamp/1000, 'unixepoch')) AS month,
attributes.name,
attributes.value,
count(items.id) AS num
FROM entity_attributes
JOIN attributes ON attributes.id = entity_attributes.attribute_id
JOIN items ON items.attribute_id = attributes.id
WHERE entity_attributes.entity_id = ?
AND items.timestamp IS NOT NULL
AND year IS NOT NULL
AND month IS NOT NULL
GROUP BY year, month, entity_attributes.attribute_id
ORDER BY year, month, attributes.value`, entityID)
if err != nil {
return nil, err
}
defer rows.Close()
type attributeStatsSeries struct {
Name string `json:"name"` // attribute value
Data [][]int64 `json:"data"`
AttributeName string `json:"attribute_name"` // attribute name is not used by the chart lib, but by our script
}
seriesMap := make(map[string]attributeStatsSeries)
var earliest, latest time.Time
for rows.Next() {
var year, month int
var count int64
var attrName, attrValue string
err := rows.Scan(&year, &month, &attrName, &attrValue, &count)
if err != nil {
return nil, err
}
if _, ok := seriesMap[attrValue]; !ok {
seriesMap[attrValue] = attributeStatsSeries{Name: attrValue, AttributeName: attrName}
}
monthYear := time.Date(year, time.Month(month), 1, 0, 0, 0, 0, time.UTC)
if monthYear.Before(earliest) || earliest.IsZero() {
earliest = monthYear
}
if monthYear.After(latest) || latest.IsZero() {
latest = monthYear
}
series := seriesMap[attrValue]
series.Data = append(series.Data, []int64{monthYear.UnixMilli(), count})
seriesMap[attrValue] = series
}
if err := rows.Err(); err != nil {
return nil, err
}
///////
for k, series := range seriesMap {
var seriesIdx int
for ts := earliest; !ts.After(latest); ts = ts.AddDate(0, 1, 0) {
if seriesIdx >= len(series.Data) {
series.Data = append(series.Data, []int64{ts.UnixMilli(), 0})
}
if ts.UnixMilli() < series.Data[seriesIdx][0] {
series.Data = slices.Insert(series.Data, seriesIdx, []int64{ts.UnixMilli(), 0})
}
seriesIdx++
}
seriesMap[k] = series
}
///////
results := make([]attributeStatsSeries, 0, len(seriesMap))
for _, series := range seriesMap {
results = append(results, series)
}
return results, nil
}