/* Timelinize Copyright (c) 2013 Matthew Holt This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package timeline import ( "context" "fmt" "net/url" "slices" "strconv" "time" ) func (tl *Timeline) Chart(ctx context.Context, chartName string, params url.Values) (any, error) { switch chartName { case "periodical": return tl.chartRecentItems(ctx, params) case "classifications": return tl.chartItemTypes(ctx) case "datasources": return tl.chartDataSourceUsage(ctx) case "attributes_stacked_area": return tl.AttributeStats(ctx, params) case "recent_data_sources": return tl.chartRecentDaysItemCount(ctx, params) default: return nil, fmt.Errorf("unknown chart name: %s", chartName) } } func (tl *Timeline) chartRecentItems(ctx context.Context, params url.Values) (any, error) { period := params.Get("period") var dateAdjust, startOf, periodColumn string switch period { default: fallthrough case "monthly": dateAdjust = "-11 months" startOf = "month" periodColumn = "strftime('%m', date(timestamp/1000, 'unixepoch'))" case "daily": dateAdjust = "-28 days" startOf = "day" periodColumn = "date(timestamp/1000, 'unixepoch')" case "yearly": dateAdjust = "-10 years" startOf = "year" periodColumn = "strftime('%Y', date(timestamp/1000, 'unixepoch'))" } // this query uses a CTE (common table expression) to find the most recent item timestamp in the database // and then subtract the time period we want to visualize... so like, if we want to view the last 12 months // including this one, we subtract 11 months and go to the start of that month (so if it's October, we // go back 11 months to the start of the last November, since we don't want to include partial October // that happened 12 months ago) -- with that value, we can then select the count of items in each period // that are newer than the cutoff date we calculated in the CTE. // (The `date(min(timestamp/1000, unixepoch()), 'unixepoch')` bit is because the highest item timestamp // in the DB might be in the future (calendar events, for example), and may often be wrong (bad data), // so we only go from the last item OR today's timestamp, whichever is earlier.) //nolint:gosec // all our values are hard-coded query := fmt.Sprintf(` WITH bounds AS ( SELECT unixepoch( (SELECT date(min(timestamp/1000, unixepoch()), 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '%s', 'start of %s' )*1000 AS earliest_timestamp ) SELECT %s AS period, count() FROM items, bounds WHERE timestamp > earliest_timestamp GROUP BY period ORDER BY timestamp`, dateAdjust, startOf, periodColumn) rows, err := tl.db.ReadPool.QueryContext(ctx, query) if err != nil { return nil, err } defer rows.Close() type periodicStats struct { Period string `json:"period"` Count int `json:"count"` } var results []periodicStats for rows.Next() { var group string var count int err := rows.Scan(&group, &count) if err != nil { return nil, err } if period == "monthly" { monthInt, err := strconv.Atoi(group) if err != nil { return nil, fmt.Errorf("bad month group: %s: %w", group, err) } group = time.Month(monthInt).String() } results = append(results, periodicStats{group, count}) } if err := rows.Err(); err != nil { return nil, err } return results, nil } func (tl *Timeline) chartItemTypes(ctx context.Context) (any, error) { rows, err := tl.db.ReadPool.QueryContext(ctx, "SELECT classification_name, count() FROM extended_items GROUP BY classification_id") if err != nil { return nil, err } defer rows.Close() type classificationStat struct { ClassificationName string `json:"name"` Count int `json:"value"` } var results []classificationStat for rows.Next() { var className *string var count int err := rows.Scan(&className, &count) if err != nil { return nil, err } if className == nil { classNameStr := "unknown" className = &classNameStr } results = append(results, classificationStat{*className, count}) } if err := rows.Err(); err != nil { return nil, err } return results, nil } func (tl *Timeline) chartRecentDaysItemCount(ctx context.Context, params url.Values) (any, error) { days, err := strconv.Atoi(params.Get("days")) if err != nil { return nil, err } const msPerDay = int(24 * time.Hour / time.Millisecond) minTime := msPerDay * days // the LIMIT is arbitrary, just to prevent an accidentally huge resultset // (the `date(min(timestamp/1000, unixepoch()), 'unixepoch'))` bit is the same as similar in another chart query, // to prevent future-timestampped items, which are usually wrong or from calendars, from crowding out all // the other data on the chart; so the latest timestamp we accept is today's date) rows, err := tl.db.ReadPool.QueryContext(ctx, ` SELECT strftime('%Y-%m-%d', date(min(timestamp/1000, unixepoch()), 'unixepoch')) AS date, data_source_name, count() FROM extended_items WHERE timestamp > unixepoch()*1000 - ? GROUP BY date, data_source_name LIMIT 2000`, minTime) if err != nil { return nil, err } defer rows.Close() type recentDaysCount struct { Date string `json:"date"` DataSourceName string `json:"data_source_name"` Count int `json:"count"` } var results []recentDaysCount for rows.Next() { var date, dsName string var count int err := rows.Scan(&date, &dsName, &count) if err != nil { return nil, err } results = append(results, recentDaysCount{date, dsName, count}) } if err := rows.Err(); err != nil { return nil, err } return results, nil } /* SELECT date(timestamp/1000, 'unixepoch') as date, strftime('%H', time(timestamp/1000, 'unixepoch')) as hour, data_source_name, count() FROM extended_items -- WHERE timestamp > unixepoch((SELECT date(timestamp/1000, 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-30 days')*1000 WHERE id % 1 = 0 -- AND timestamp IS NOT NULL AND timestamp > unixepoch((SELECT date(timestamp/1000, 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-365 days')*1000 GROUP BY date, hour ORDER BY timestamp LIMIT 1000; */ // chartDataSourceUsage returns the counts by data source. func (tl *Timeline) chartDataSourceUsage(ctx context.Context) (any, error) { // This query is a bit of a mouthful, but basically, its purpose is to retrieve a sampling of items // that are likely representative of the whole repo, and group them by data source and hour so that // they can be rendered onto a chart where the X axis is date, Y is time/hour of day, and Z (the size // of the bubble) is the number of items in that group. This particular query accounts for timezone // offsets where possible/non-NULL, so that items that were experienced at, say, 10AM, don't show up // at ridiculous unholy hours of the morning; `timediff(datetime('now', 'localtime'), datetime())` // calculates the local timezone offset, which is used as a second-best guess if the item row does // not include an offset. This is a reasonable assumption as long as the person's data generally // stays in the same timezone. // // We currently group by week number; this is arbitrary but strikes a decent balance between group // size and granularity. Too granular (i.e. daily) and there's too many data points. Groups too large // will lead to a sparse and uninteresting/unhelpful chart. We also currently select timestamps that // happened up to a year before the last item in the table, but this is also arbitrary and helps form // the readability of the chart. Finally, we limit to 1000 results to avoid burdening the frontend. // Another idea is to not constrain by timestamp and instead to sample every Nth row, for example // (but still would want to ensure timestamp is not null). // // Oh, and we make sure to set the end date to the last item, or today's date if the last item is in // the future, since future items are outliers and throw off the whole chart. rows, err := tl.db.ReadPool.QueryContext(ctx, ` SELECT data_source_name, date(timestamp/1000, 'unixepoch') AS date, CASE WHEN time_offset THEN cast(strftime('%k', time(timestamp/1000, 'unixepoch', time_offset||' seconds')) AS INTEGER) ELSE cast(strftime('%k', time(timestamp/1000, 'unixepoch', timediff(datetime('now', 'localtime'), datetime()))) AS INTEGER) END AS hour, count() FROM extended_items WHERE timestamp > unixepoch((SELECT date(min(timestamp/1000, unixepoch()), 'unixepoch') FROM items ORDER BY timestamp DESC LIMIT 1), '-730 days')*1000 AND timestamp <= unixepoch()*1000 GROUP BY strftime('%Y %W', date(timestamp/1000, 'unixepoch')), hour ORDER BY data_source_name, timestamp LIMIT 2000`) if err != nil { return nil, err } defer rows.Close() type dsUsageSeries struct { Name string `json:"name"` Data [][]any `json:"data"` // inner dimension is [x, y, z] or, specifically, [date, hour, count] } var all []dsUsageSeries for rows.Next() { var date, dsName string var hour, count int err := rows.Scan(&dsName, &date, &hour, &count) if err != nil { return nil, err } if len(all) == 0 || all[len(all)-1].Name != dsName { all = append(all, dsUsageSeries{Name: dsName}) } all[len(all)-1].Data = append(all[len(all)-1].Data, []any{date, hour, count}) } if err := rows.Err(); err != nil { return nil, err } return all, nil } // AttributeStats returns the counts of items by month for attributes of an entity. func (tl *Timeline) AttributeStats(ctx context.Context, params url.Values) (any, error) { entityID, err := strconv.Atoi(params.Get("entity_id")) if err != nil { return nil, fmt.Errorf("invalid entity ID; must be integer: %w", err) } rows, err := tl.db.ReadPool.QueryContext(ctx, ` SELECT strftime('%Y', date(items.timestamp/1000, 'unixepoch')) AS year, strftime('%m', date(items.timestamp/1000, 'unixepoch')) AS month, attributes.name, attributes.value, count(items.id) AS num FROM entity_attributes JOIN attributes ON attributes.id = entity_attributes.attribute_id JOIN items ON items.attribute_id = attributes.id WHERE entity_attributes.entity_id = ? AND items.timestamp IS NOT NULL AND year IS NOT NULL AND month IS NOT NULL GROUP BY year, month, entity_attributes.attribute_id ORDER BY year, month, attributes.value`, entityID) if err != nil { return nil, err } defer rows.Close() type attributeStatsSeries struct { Name string `json:"name"` // attribute value Data [][]int64 `json:"data"` AttributeName string `json:"attribute_name"` // attribute name is not used by the chart lib, but by our script } seriesMap := make(map[string]attributeStatsSeries) var earliest, latest time.Time for rows.Next() { var year, month int var count int64 var attrName, attrValue string err := rows.Scan(&year, &month, &attrName, &attrValue, &count) if err != nil { return nil, err } if _, ok := seriesMap[attrValue]; !ok { seriesMap[attrValue] = attributeStatsSeries{Name: attrValue, AttributeName: attrName} } monthYear := time.Date(year, time.Month(month), 1, 0, 0, 0, 0, time.UTC) if monthYear.Before(earliest) || earliest.IsZero() { earliest = monthYear } if monthYear.After(latest) || latest.IsZero() { latest = monthYear } series := seriesMap[attrValue] series.Data = append(series.Data, []int64{monthYear.UnixMilli(), count}) seriesMap[attrValue] = series } if err := rows.Err(); err != nil { return nil, err } /////// for k, series := range seriesMap { var seriesIdx int for ts := earliest; !ts.After(latest); ts = ts.AddDate(0, 1, 0) { if seriesIdx >= len(series.Data) { series.Data = append(series.Data, []int64{ts.UnixMilli(), 0}) } if ts.UnixMilli() < series.Data[seriesIdx][0] { series.Data = slices.Insert(series.Data, seriesIdx, []int64{ts.UnixMilli(), 0}) } seriesIdx++ } seriesMap[k] = series } /////// results := make([]attributeStatsSeries, 0, len(seriesMap)) for _, series := range seriesMap { results = append(results, series) } return results, nil }