/*
Timelinize
Copyright (c) 2013 Matthew Holt
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-- Generic key/value store for the repo itself (version, ID, etc), and settings.
CREATE TABLE IF NOT EXISTS "repo" (
"key" TEXT PRIMARY KEY,
"value",
"type" TEXT -- hint to the application since all values come back as strings - TODO: can we just infer this? That would be much nicer, but could also be problematic
) WITHOUT ROWID;
-- A data source is where data comes from, like a content provider, like a cloud photo service,
-- social media site, or exported archive format. This table primarily exists to provide data
-- consistency for rows that refer to a data source enforced by the DB itself, using more compact
-- IDs than the program's string names.
CREATE TABLE IF NOT EXISTS "data_sources" (
"id" INTEGER PRIMARY KEY, -- row ID, used only for DB consistency, applies only to this DB
"name" TEXT NOT NULL UNIQUE, -- programming ID, e.g. "google_photos" as used in the application code and across different timeline DBs
"title" TEXT NOT NULL, -- human-readable name/label
"description" TEXT,
"media" BLOB, -- icon/image, square dimensions work best
"media_type" TEXT, -- MIME type of the icon
"standard" BOOLEAN NOT NULL DEFAULT false -- true for data sources that come hard-coded with the app
);
CREATE TABLE IF NOT EXISTS "jobs" (
"id" INTEGER PRIMARY KEY,
"type" TEXT NOT NULL, -- import, thumbnails, etc...
"name" TEXT, -- an optional user-assigned name
"configuration" TEXT, -- encoded as JSON
"hash" BLOB, -- for preventing duplicate jobs; opaque to everything except the code creating the job
"state" TEXT NOT NULL DEFAULT 'queued', -- queued, started, paused, aborted, succeeded, failed
"hostname" TEXT, -- hostname of the machine the job was created and configured on
"created" INTEGER NOT NULL DEFAULT (cast(round(unixepoch('subsec')*1000) AS INTEGER)), -- timestamp job was stored/enqueued in unix milliseconds UTC
"updated" INTEGER, -- timestamp of last DB sync (in unix milliseconds UTC)
"start" INTEGER, -- timestamp job was actually started in unix milliseconds UTC *could be future, so not called "started")
"ended" INTEGER, -- timestamp in unix milliseconds UTC when job ended (not paused)
"message" TEXT, -- brief message describing current status to be shown to the user, changes less frequently than log emissions
"total" INTEGER, -- total number of units to complete
"progress" INTEGER, -- number of units completed towards the total count
"checkpoint" TEXT, -- required state for resuming an incomplete job, as a JSON serialization
-- if job is scheduled to run automatically at a certain interval, the following fields track that state
"repeat" INTEGER, -- when this job is started, next job should be scheduled (inserted for future start) this many seconds from start time (not to be started if previous still running)
"parent_job_id" INTEGER, -- the job before this one that scheduled or created this one, forming a chain or linked list
FOREIGN KEY ("parent_job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL
) STRICT;
-- Entity type names are hard-coded (but their IDs are not).
CREATE TABLE IF NOT EXISTS "entity_types" (
"id" INTEGER PRIMARY KEY,
"name" TEXT NOT NULL UNIQUE
) STRICT;
-- An entity represents a person, place, or thing (i.e. nouns). The vast majority will be
-- persons, but some may be pets/animals, companies and organizations, political
-- entities, or landmarks/places. It is not uncommon for duplicate entities to appear in
-- this table through various data imports, because each data source has a different way
-- of identifying or describing users/entities. Manual curation or post-processing may be
-- required to reconcile duplicates.
CREATE TABLE IF NOT EXISTS "entities" (
"id" INTEGER PRIMARY KEY,
"type_id" INTEGER NOT NULL,
"job_id" INTEGER, -- import that originally created this entity, if via a data import
"stored" INTEGER NOT NULL DEFAULT (unixepoch()),
"modified" INTEGER, -- timestamp when entity was locally/manually modified (may include linked attributes)
"name" TEXT COLLATE NOCASE,
"picture_file" TEXT,
"metadata" TEXT, -- optional extra information, encoded as JSON (should almost never be used! use attributes instead)
"hidden" INTEGER, -- if owner would like to forget about this person, don't show in search results, etc.
"deleted" INTEGER, -- timestamp when item was moved to trash and can be purged after some amount of time after that
FOREIGN KEY ("type_id") REFERENCES "entity_types"("id") ON UPDATE CASCADE,
FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL
) STRICT;
-- Attributes describe entities. An attribute has a name and a value. The name for a particular
-- kind of attribute should be consistent throughout because some attributes need special handling
-- or can be normalized in the code (e.g. phone numbers). A single attribute may describe one or
-- more entities; attributes that don't link to any entities (via the entity_attributes table)
-- should not exist (see the relevant trigger for this). An attribute may identify an entity,
-- for example, a phone number or email address is kind of a globally-unique identifier; or an
-- attribute named "twitter_id" for example would identify an entity on Twitter. An attribute
-- that is an identity for one entity may not be for another, so that is encoded in the
-- entity_attributes table instead. The special attribute named "_entity" acts as a pass-thru
-- attribute directly to the entity by ID; this is useful if there's no particular attribute
-- relevant or known, and instead we want to specify an entity directly instead.
-- TODO: For ancestors, their identifying attribute could be a FamilySearch record ID.
CREATE TABLE IF NOT EXISTS "attributes" (
"id" INTEGER PRIMARY KEY,
"name" TEXT NOT NULL,
"value" ANY COLLATE NOCASE,
"alt_value" TEXT, -- optional alternate value intended for display or as a description
-- the coordinate values below are useful if the attribute value is, in fact, a location
"longitude" REAL,
"latitude" REAL,
"altitude" REAL,
"metadata" TEXT, -- optional extra info encoded as JSON
UNIQUE ("name", "value")
) STRICT;
-- This table links entities and attributes. An entity may have multiple attributes, and an attribute
-- may apply to multiple entities. If the attribute represents an entity's identity on a particular
-- data source, the data_source_id field will be filled out.
CREATE TABLE IF NOT EXISTS "entity_attributes" (
"id" INTEGER PRIMARY KEY,
"entity_id" INTEGER NOT NULL,
"attribute_id" INTEGER NOT NULL,
"data_source_id" INTEGER, -- if set, the attribute defines the entity's identity on this data source (a row of a certain entity_id and attribute_id can be duplicated if they are identities on different data sources)
"job_id" INTEGER, -- the ID of the import that originated this row
"start" INTEGER, -- when the attribute started applying to the entity
"end" INTEGER, -- when the attribute stopped applying to the entity
FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("data_source_id") REFERENCES "data_sources"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL,
UNIQUE("entity_id", "attribute_id", "data_source_id")
) STRICT;
-- These indexes make loading entities by attributes faster, especially during import jobs, when it's in the hot path.
-- (If the query plan still shows "SCAN entities" then it might be due to the analyzer
-- determining that's faster than using the index; can test the index by adding "INDEXED BY...")
CREATE INDEX IF NOT EXISTS "idx_entities_type_name" ON "entities"("type_id", "name");
CREATE INDEX IF NOT EXISTS "idx_attributes_name_value" ON "attributes"("name", "value");
CREATE INDEX IF NOT EXISTS "idx_ea_entity_attr" ON "entity_attributes"("attribute_id", "entity_id");
-- A classification describes what kind of thing an item is, for example a text message, tweet, email, or location.
CREATE TABLE IF NOT EXISTS "classifications" (
"id" INTEGER PRIMARY KEY,
"standard" INTEGER NOT NULL DEFAULT 0, -- 1: hard-coded classification, 0: user-created
"name" TEXT NOT NULL UNIQUE, -- programming name, e.g. "message"
"labels" TEXT NOT NULL, -- comma-separated human-friendly names, e.g. "Text message,SMS,MMS,iMessage,Texting"
"description" TEXT NOT NULL
) STRICT;
-- TODO: Not used currently (but is supported, and accessed in queries).
CREATE TABLE IF NOT EXISTS "item_data" (
"id" INTEGER PRIMARY KEY,
"content" BLOB NOT NULL UNIQUE
) STRICT;
-- An item is something imported from a specific data source. They represent things that are
-- originated, created, or recorded; as opposed to entities, which just exist and don't have
-- specific temporal significance. Items typically exist as a result of entities existing.
CREATE TABLE IF NOT EXISTS "items" (
"id" INTEGER PRIMARY KEY,
"data_source_id" INTEGER,
"job_id" INTEGER, -- the import job that originally inserted this item
"modified_job_id" INTEGER, -- the import job that most recently modified this existing item
"attribute_id" INTEGER, -- owner, creator, or originator attributed to this item
"classification_id" INTEGER,
"original_id" TEXT, -- ID provided by the data source
"original_location" TEXT, -- path or location of the file/data on the original data source; should include filename if applicable
"intermediate_location" TEXT, -- path or location of the file/data from the import dataset (e.g. after exporting from the data source); should include filename if application
"filename" TEXT, -- name of the original file as named by the owner, if known
"timestamp" INTEGER, -- unix epoch millisecond timestamp when item content was originally created (NOT when the database row was created)
"timespan" INTEGER, -- ending unix epoch ms timestamp if this item spans time (instead of being a single point in time); can be used in conjunction with timeframe to suggest duration
"timeframe" INTEGER, -- ending unix epoch ms timestamp if this item takes place somewhere between timestamp and timeframe, but it's not certain exactly when
"time_offset" INTEGER, -- offset of original timestamp in seconds east of UTC/GMT (time zone); null means "local time" or "wall time"
"time_offset_origin" TEXT, -- if the offset (time zone) was explicitly part of the timestamp, this will be null; otherwise a byte indicating how we sourced/inferred the offset
"time_uncertainty" INTEGER, -- if nonzero, time columns may be inaccurate on the order of this number of milliseconds, essentially sliding the times in a fuzzy interval
"stored" INTEGER NOT NULL DEFAULT (unixepoch()), -- unix epoch second timestamp when row was created or last retrieved from source
"modified" INTEGER, -- unix epoch second timestamp when item was manually modified (not via an import); if not null, then item is "not clean"
"data_id" INTEGER, -- TODO: not used currently, but may be used to store binary data in the database directly
"data_type" TEXT, -- the MIME type (aka "media type") of the data
"data_text" TEXT COLLATE NOCASE, -- item content, if text-encoded and (by default) not very long
"data_file" TEXT COLLATE NOCASE, -- item filename, if non-text or not suitable for storage in DB (usually media), relative to repo root
"data_hash" BLOB, -- BLAKE3 checksum of contents of the data file
"metadata" TEXT, -- optional extra information, encoded as JSON for flexibility
"longitude" REAL, -- or equivalent X-coord for the coordinate system
"latitude" REAL, -- or equivalent Y-coord for the coordinate system
"altitude" REAL, -- or equivalent Z-coord for the coordinate system
"coordinate_system" TEXT, -- reserved for future use to define which coordinate system is being used or on which world coordinates refer to
"coordinate_uncertainty" REAL, -- if nonzero, lat/lon values may be inaccurate by this amount (same unit as coordinates)
"note" TEXT, -- optional user-added information
"starred" INTEGER, -- like a bookmark; TODO: different numbers indicate different kinds of stars or something?
"thumb_hash" BLOB, -- bytes of the ThumbHash (https://evanw.github.io/thumbhash/ and https://github.com/evanw/thumbhash) - prefixed with aspect ratio; stored here with item for fast, easy retrieval
"original_id_hash" BLOB, -- a hash of the data source and original ID of the item, also used for duplicate detection, optionally stored when item is deleted
"initial_content_hash" BLOB, -- a hash computed during initial import, used for duplicate detection (remains same even if item is modified by user)
"retrieval_key" BLOB UNIQUE, -- an optional opaque value that indicates this item may not be fully populated in a single import; not an ID but still a unique identifier
"hidden" INTEGER, -- if owner would like to forget about this item, don't show it in search results, etc. TODO: keep?
"deleted" INTEGER, -- 1 = if the columns will be erased, they have been erased; >1 = a unix epoch timestamp after which the columns can be erased
FOREIGN KEY ("data_source_id") REFERENCES "data_sources"("id") ON UPDATE CASCADE,
FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE, --TODO: maybe add ON DELETE CASCADE someday, which would rely on a regular sweeping of the files (garbage collection)! or we could do SET NULL
FOREIGN KEY ("modified_job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL, -- deleting that import won't undo the changes, however
FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE,
FOREIGN KEY ("classification_id") REFERENCES "classifications"("id") ON UPDATE CASCADE,
FOREIGN KEY ("data_id") REFERENCES "item_data"("id") ON UPDATE CASCADE ON DELETE SET NULL,
UNIQUE ("data_source_id", "original_id")
) STRICT;
CREATE INDEX IF NOT EXISTS "idx_items_timestamp" ON "items"("timestamp"); -- the need for this is obvious
CREATE INDEX IF NOT EXISTS "idx_items_data_file" ON "items"("data_file"); -- used during imports especially for maintenance
-- These next two partial indexes greatly speed up processing when importing items, or any queries that
-- check for existing rows that may have been deleted or modified from their original content. Because
-- they are partial indexes, I have found that they do not greatly impact insert performance since most
-- new items are not deleted or modified going in.
CREATE INDEX IF NOT EXISTS "idx_items_deleted_original_id_hash" ON "items"("deleted", "original_id_hash") WHERE deleted IS NOT NULL OR original_id_hash IS NOT NULL;
CREATE INDEX IF NOT EXISTS "idx_items_initial_content_hash" ON "items"("initial_content_hash") WHERE modified IS NOT NULL OR deleted IS NOT NULL;
-- This partial index speeds up processing for items that have incoming data and we need to check for
-- existing item by its data (i.e. Collection items, very common with photo libraries; one per media item!)
CREATE INDEX IF NOT EXISTS "idx_items_data_text_hash" ON "items"("data_text" COLLATE NOCASE, "data_hash") WHERE data_text IS NOT NULL OR data_hash IS NOT NULL;
-- This partial index speeds up processing for items that have IDs assigned by their data source.
CREATE INDEX IF NOT EXISTS "idx_items_original_id" ON "items"("original_id") WHERE original_ID IS NOT NULL;
-- Relationships may exist between and across items and entities. A row
-- in this table is an actual connection between items and/or entities.
CREATE TABLE IF NOT EXISTS "relationships" (
"id" INTEGER PRIMARY KEY,
"relation_id" INTEGER NOT NULL,
"value" ANY, -- optional; some relationships make sense if they contain a value (like a reaction: what was the reaction? or a collection element, what is its position?)
-- exactly one of the from_ fields and one of the to_ fields should be non-NULL
"from_item_id" INTEGER,
"from_attribute_id" INTEGER,
"to_item_id" INTEGER,
"to_attribute_id" INTEGER,
-- the next two fields optionally describe when the relationship started and/or ended as unix epoch timestamp in seconds
"start" INTEGER,
"end" INTEGER,
"metadata" TEXT, -- optional extra info encoded as JSON
FOREIGN KEY ("relation_id") REFERENCES "relations"("id") ON UPDATE CASCADE,
FOREIGN KEY ("from_item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("to_item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("from_attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("to_attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE
) STRICT;
-- This speeds up inserts because we check for duplicate relationships in the app (it's complex with potential timeframes),
-- and this way only 1 index is necessary, not multiple on the various fields we check for uniqueness
CREATE INDEX IF NOT EXISTS "idx_relationships_from_item_id" ON "relationships"("from_item_id");
-- Relations define the way relationships connect. They are described by natural
-- language phrases such as "in reply to", "picture of", or "attached to"; or could
-- be words like "siblings", "coworkers", or "related". Relations will either be
-- directed (one way; to/from matter) or bidirectional (both ways), which depends
-- on the phrase and whatever makes sense if you were to put it into a sentence.
CREATE TABLE IF NOT EXISTS "relations" (
"id" INTEGER PRIMARY KEY,
"label" TEXT NOT NULL UNIQUE, -- natural phrase
"directed" BOOLEAN NOT NULL DEFAULT true, -- false = bidirectional
"subordinating" BOOLEAN NOT NULL DEFAULT false -- if true, item on the end of directed relation does not make sense on its own, e.g. attachment or motion picture
); -- not STRICT due to boolean type
-- A story is a user-created document with rich text formatting (basically HTML;
-- including hrefs to other items/entities). It can also embed timeline data, which
-- embeddings are stored in the story_elements table.
CREATE TABLE IF NOT EXISTS "stories" (
"id" INTEGER PRIMARY KEY,
"name" TEXT,
"description" TEXT,
"content" TEXT COLLATE NOCASE,
"content_type" TEXT NOT NULL, -- MIME type; "text/html", "text/markdown", etc -- whatever form the content is in
"modified" INTEGER NOT NULL DEFAULT (unixepoch())
) STRICT;
-- Elements of stories. An element can be rich text (HTML; including hrefs to other items/entities);
-- an embedded item, entity, or collection; or a query that loads a dynamic set of items, entities, or
-- collections. Rich text content can be provided together with embedded data as an annotation.
CREATE TABLE IF NOT EXISTS "story_elements" (
"id" INTEGER PRIMARY KEY,
"story_id" INTEGER NOT NULL,
-- only one of the following 5 fields should be populated
"item_id" INTEGER, -- embedded item
"entity_id" INTEGER, -- embedded entity
"query" TEXT, -- embedded dynamic set; omits SELECT, e.g. "FROM items|entities WHERE ... ORDER BY ... LIMIT ..."
"external" TEXT, -- embedded data from other timeline/repo DB, of this format: ":|item|entity:rowid"
"note" TEXT, -- optional annotation of embedded content
FOREIGN KEY ("story_id") REFERENCES "stories"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE SET NULL,
FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE SET NULL
) STRICT;
-- A tag is a simple descriptor on an item, put there by the user for organizational purposes.
CREATE TABLE IF NOT EXISTS "tags" (
"id" INTEGER PRIMARY KEY,
"label" TEXT NOT NULL UNIQUE,
"color" TEXT
) STRICT;
-- The tags associated with items, entities, stories, or relationships.
-- The same tag may not be applied multiple times to the same entity.
CREATE TABLE IF NOT EXISTS "tagged" (
"id" INTEGER PRIMARY KEY,
"tag_id" INTEGER NOT NULL,
-- only one of the following ID fields should be populated per row
"item_id" INTEGER,
"entity_id" INTEGER,
"attribute_id" INTEGER,
"story_id" INTEGER,
"relationship_id" INTEGER,
"data" TEXT, -- could be used for rich relationships, e.g. the coordinates of a entity tagged in a photo, etc.
-- TODO: a way to indicate if this tag came as part of an import from a data source?
FOREIGN KEY ("tag_id") REFERENCES "tags"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("story_id") REFERENCES "stories"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("relationship_id") REFERENCES "relationships"("id") ON UPDATE CASCADE ON DELETE CASCADE
) STRICT;
-- TODO: I might get rid of this. This table would become user-created commentary on items/entities... but those could be items themselves...
CREATE TABLE IF NOT EXISTS "notes" (
"id" INTEGER PRIMARY KEY,
-- only one of the following ID fields should be populated per row
"item_id" INTEGER,
"entity_id" INTEGER,
"content" TEXT, -- a user-friendly description that doesn't fit the DB structure
"metadata" TEXT, -- optional metadata structured as JSON
FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE
) STRICT;
-- A log of changes, notices, warnings, or errors pertaining to elements of the timeline.
CREATE TABLE IF NOT EXISTS "logs" (
"id" INTEGER PRIMARY KEY,
"level" INTEGER, -- 0=update/modification 1=notice 2=error
"message" TEXT,
"metadata" TEXT, -- optional metadata structured as JSON
"timestamp" INTEGER, -- unix timestamp milliseconds
-- only one of the following ID fields should be populated per row
"job_id" INTEGER,
"item_id" INTEGER,
"entity_id" INTEGER,
"attribute_id" INTEGER,
"relationship_id" INTEGER,
"entity_attribute_id" INTEGER,
FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("relationship_id") REFERENCES "relationships"("id") ON UPDATE CASCADE ON DELETE CASCADE,
FOREIGN KEY ("entity_attribute_id") REFERENCES "entity_attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE
) STRICT;
-- Embeddings enable "intelligent" search using ML models to derive semantics and meaning.
-- By finding other embeddings that are close (dot product or euclidean distance),
-- similarity searches are possible. This requires the sqlite-vec module.
CREATE TABLE IF NOT EXISTS "embeddings" (
"id" INTEGER PRIMARY KEY,
"item_id" INTEGER,
"generated" INTEGER NOT NULL DEFAULT (unixepoch()), -- when the embedding was generated (timestamp in unix seconds UTC)
"embedding" BLOB, -- TODO: could define as float[768] (unless STRICT) and then use `check(typeof(contents_embedding) == 'blob' AND vec_length(contents_embedding) == 768)`
FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE
) STRICT;
CREATE INDEX IF NOT EXISTS "idx_embeddings_item_id" ON "embeddings"("item_id");
-- TODO: this is convenient -- will probably keep this, because the db-based enums like data sources and classifications
-- don't get translated earlier; maybe we could, but I still need to think on that... if we do keep this,
-- I wonder if it'd be useful to loop in the attribute name and value as well? for item de-duplication in loadItemRow()....
CREATE VIEW IF NOT EXISTS "extended_items" AS
SELECT
items.*,
data_sources.name AS data_source_name,
data_sources.title AS data_source_title,
classifications.name AS classification_name,
-- TODO: verify these are useful fields and make the dashboard queries more efficient
cast(strftime('%j', date(round(timestamp/1000), 'unixepoch')) AS INTEGER) AS day_of_year,
cast(strftime('%W', date(round(timestamp/1000), 'unixepoch')) AS INTEGER) AS week_of_year,
cast(strftime('%m', date(round(timestamp/1000), 'unixepoch')) AS INTEGER) AS month_of_year
FROM items
LEFT JOIN data_sources ON data_sources.id = items.data_source_id
LEFT JOIN classifications ON classifications.id = items.classification_id;
-- Don't allow stray attributes; if for any reason no entity_attributes rows
-- point to a given attribute, always delete the attribute to clean up. The
-- only reason we have them separated into two tables is to allow multiple
-- entities to share an attribute, but it's pointless for an attribute to not
-- be associated with any entities.
CREATE TRIGGER IF NOT EXISTS prevent_stray_attributes
AFTER DELETE ON entity_attributes
FOR EACH ROW
WHEN
(SELECT count(1) FROM
(SELECT id FROM entity_attributes WHERE attribute_id=OLD.attribute_id LIMIT 1)) = 0
BEGIN
DELETE FROM attributes WHERE id=OLD.attribute_id;
END;