/* Timelinize Copyright (c) 2013 Matthew Holt This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -- Generic key/value store for the repo itself (version, ID, etc), and settings. CREATE TABLE IF NOT EXISTS "repo" ( "key" TEXT PRIMARY KEY, "value", "type" TEXT -- hint to the application since all values come back as strings - TODO: can we just infer this? That would be much nicer, but could also be problematic ) WITHOUT ROWID; -- A data source is where data comes from, like a content provider, like a cloud photo service, -- social media site, or exported archive format. This table primarily exists to provide data -- consistency for rows that refer to a data source enforced by the DB itself, using more compact -- IDs than the program's string names. CREATE TABLE IF NOT EXISTS "data_sources" ( "id" INTEGER PRIMARY KEY, -- row ID, used only for DB consistency, applies only to this DB "name" TEXT NOT NULL UNIQUE, -- programming ID, e.g. "google_photos" as used in the application code and across different timeline DBs "title" TEXT NOT NULL, -- human-readable name/label "description" TEXT, "media" BLOB, -- icon/image, square dimensions work best "media_type" TEXT, -- MIME type of the icon "standard" BOOLEAN NOT NULL DEFAULT false -- true for data sources that come hard-coded with the app ); CREATE TABLE IF NOT EXISTS "jobs" ( "id" INTEGER PRIMARY KEY, "type" TEXT NOT NULL, -- import, thumbnails, etc... "name" TEXT, -- an optional user-assigned name "configuration" TEXT, -- encoded as JSON "hash" BLOB, -- for preventing duplicate jobs; opaque to everything except the code creating the job "state" TEXT NOT NULL DEFAULT 'queued', -- queued, started, paused, aborted, succeeded, failed "hostname" TEXT, -- hostname of the machine the job was created and configured on "created" INTEGER NOT NULL DEFAULT (cast(round(unixepoch('subsec')*1000) AS INTEGER)), -- timestamp job was stored/enqueued in unix milliseconds UTC "updated" INTEGER, -- timestamp of last DB sync (in unix milliseconds UTC) "start" INTEGER, -- timestamp job was actually started in unix milliseconds UTC *could be future, so not called "started") "ended" INTEGER, -- timestamp in unix milliseconds UTC when job ended (not paused) "message" TEXT, -- brief message describing current status to be shown to the user, changes less frequently than log emissions "total" INTEGER, -- total number of units to complete "progress" INTEGER, -- number of units completed towards the total count "checkpoint" TEXT, -- required state for resuming an incomplete job, as a JSON serialization -- if job is scheduled to run automatically at a certain interval, the following fields track that state "repeat" INTEGER, -- when this job is started, next job should be scheduled (inserted for future start) this many seconds from start time (not to be started if previous still running) "parent_job_id" INTEGER, -- the job before this one that scheduled or created this one, forming a chain or linked list FOREIGN KEY ("parent_job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL ) STRICT; -- Entity type names are hard-coded (but their IDs are not). CREATE TABLE IF NOT EXISTS "entity_types" ( "id" INTEGER PRIMARY KEY, "name" TEXT NOT NULL UNIQUE ) STRICT; -- An entity represents a person, place, or thing (i.e. nouns). The vast majority will be -- persons, but some may be pets/animals, companies and organizations, political -- entities, or landmarks/places. It is not uncommon for duplicate entities to appear in -- this table through various data imports, because each data source has a different way -- of identifying or describing users/entities. Manual curation or post-processing may be -- required to reconcile duplicates. CREATE TABLE IF NOT EXISTS "entities" ( "id" INTEGER PRIMARY KEY, "type_id" INTEGER NOT NULL, "job_id" INTEGER, -- import that originally created this entity, if via a data import "stored" INTEGER NOT NULL DEFAULT (unixepoch()), "modified" INTEGER, -- timestamp when entity was locally/manually modified (may include linked attributes) "name" TEXT COLLATE NOCASE, "picture_file" TEXT, "metadata" TEXT, -- optional extra information, encoded as JSON (should almost never be used! use attributes instead) "hidden" INTEGER, -- if owner would like to forget about this person, don't show in search results, etc. "deleted" INTEGER, -- timestamp when item was moved to trash and can be purged after some amount of time after that FOREIGN KEY ("type_id") REFERENCES "entity_types"("id") ON UPDATE CASCADE, FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL ) STRICT; -- Attributes describe entities. An attribute has a name and a value. The name for a particular -- kind of attribute should be consistent throughout because some attributes need special handling -- or can be normalized in the code (e.g. phone numbers). A single attribute may describe one or -- more entities; attributes that don't link to any entities (via the entity_attributes table) -- should not exist (see the relevant trigger for this). An attribute may identify an entity, -- for example, a phone number or email address is kind of a globally-unique identifier; or an -- attribute named "twitter_id" for example would identify an entity on Twitter. An attribute -- that is an identity for one entity may not be for another, so that is encoded in the -- entity_attributes table instead. The special attribute named "_entity" acts as a pass-thru -- attribute directly to the entity by ID; this is useful if there's no particular attribute -- relevant or known, and instead we want to specify an entity directly instead. -- TODO: For ancestors, their identifying attribute could be a FamilySearch record ID. CREATE TABLE IF NOT EXISTS "attributes" ( "id" INTEGER PRIMARY KEY, "name" TEXT NOT NULL, "value" ANY COLLATE NOCASE, "alt_value" TEXT, -- optional alternate value intended for display or as a description -- the coordinate values below are useful if the attribute value is, in fact, a location "longitude" REAL, "latitude" REAL, "altitude" REAL, "metadata" TEXT, -- optional extra info encoded as JSON UNIQUE ("name", "value") ) STRICT; -- This table links entities and attributes. An entity may have multiple attributes, and an attribute -- may apply to multiple entities. If the attribute represents an entity's identity on a particular -- data source, the data_source_id field will be filled out. CREATE TABLE IF NOT EXISTS "entity_attributes" ( "id" INTEGER PRIMARY KEY, "entity_id" INTEGER NOT NULL, "attribute_id" INTEGER NOT NULL, "data_source_id" INTEGER, -- if set, the attribute defines the entity's identity on this data source (a row of a certain entity_id and attribute_id can be duplicated if they are identities on different data sources) "job_id" INTEGER, -- the ID of the import that originated this row "start" INTEGER, -- when the attribute started applying to the entity "end" INTEGER, -- when the attribute stopped applying to the entity FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("data_source_id") REFERENCES "data_sources"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL, UNIQUE("entity_id", "attribute_id", "data_source_id") ) STRICT; -- These indexes make loading entities by attributes faster, especially during import jobs, when it's in the hot path. -- (If the query plan still shows "SCAN entities" then it might be due to the analyzer -- determining that's faster than using the index; can test the index by adding "INDEXED BY...") CREATE INDEX IF NOT EXISTS "idx_entities_type_name" ON "entities"("type_id", "name"); CREATE INDEX IF NOT EXISTS "idx_attributes_name_value" ON "attributes"("name", "value"); CREATE INDEX IF NOT EXISTS "idx_ea_entity_attr" ON "entity_attributes"("attribute_id", "entity_id"); -- A classification describes what kind of thing an item is, for example a text message, tweet, email, or location. CREATE TABLE IF NOT EXISTS "classifications" ( "id" INTEGER PRIMARY KEY, "standard" INTEGER NOT NULL DEFAULT 0, -- 1: hard-coded classification, 0: user-created "name" TEXT NOT NULL UNIQUE, -- programming name, e.g. "message" "labels" TEXT NOT NULL, -- comma-separated human-friendly names, e.g. "Text message,SMS,MMS,iMessage,Texting" "description" TEXT NOT NULL ) STRICT; -- TODO: Not used currently (but is supported, and accessed in queries). CREATE TABLE IF NOT EXISTS "item_data" ( "id" INTEGER PRIMARY KEY, "content" BLOB NOT NULL UNIQUE ) STRICT; -- An item is something imported from a specific data source. They represent things that are -- originated, created, or recorded; as opposed to entities, which just exist and don't have -- specific temporal significance. Items typically exist as a result of entities existing. CREATE TABLE IF NOT EXISTS "items" ( "id" INTEGER PRIMARY KEY, "data_source_id" INTEGER, "job_id" INTEGER, -- the import job that originally inserted this item "modified_job_id" INTEGER, -- the import job that most recently modified this existing item "attribute_id" INTEGER, -- owner, creator, or originator attributed to this item "classification_id" INTEGER, "original_id" TEXT, -- ID provided by the data source "original_location" TEXT, -- path or location of the file/data on the original data source; should include filename if applicable "intermediate_location" TEXT, -- path or location of the file/data from the import dataset (e.g. after exporting from the data source); should include filename if application "filename" TEXT, -- name of the original file as named by the owner, if known "timestamp" INTEGER, -- unix epoch millisecond timestamp when item content was originally created (NOT when the database row was created) "timespan" INTEGER, -- ending unix epoch ms timestamp if this item spans time (instead of being a single point in time); can be used in conjunction with timeframe to suggest duration "timeframe" INTEGER, -- ending unix epoch ms timestamp if this item takes place somewhere between timestamp and timeframe, but it's not certain exactly when "time_offset" INTEGER, -- offset of original timestamp in seconds east of UTC/GMT (time zone); null means "local time" or "wall time" "time_offset_origin" TEXT, -- if the offset (time zone) was explicitly part of the timestamp, this will be null; otherwise a byte indicating how we sourced/inferred the offset "time_uncertainty" INTEGER, -- if nonzero, time columns may be inaccurate on the order of this number of milliseconds, essentially sliding the times in a fuzzy interval "stored" INTEGER NOT NULL DEFAULT (unixepoch()), -- unix epoch second timestamp when row was created or last retrieved from source "modified" INTEGER, -- unix epoch second timestamp when item was manually modified (not via an import); if not null, then item is "not clean" "data_id" INTEGER, -- TODO: not used currently, but may be used to store binary data in the database directly "data_type" TEXT, -- the MIME type (aka "media type") of the data "data_text" TEXT COLLATE NOCASE, -- item content, if text-encoded and (by default) not very long "data_file" TEXT COLLATE NOCASE, -- item filename, if non-text or not suitable for storage in DB (usually media), relative to repo root "data_hash" BLOB, -- BLAKE3 checksum of contents of the data file "metadata" TEXT, -- optional extra information, encoded as JSON for flexibility "longitude" REAL, -- or equivalent X-coord for the coordinate system "latitude" REAL, -- or equivalent Y-coord for the coordinate system "altitude" REAL, -- or equivalent Z-coord for the coordinate system "coordinate_system" TEXT, -- reserved for future use to define which coordinate system is being used or on which world coordinates refer to "coordinate_uncertainty" REAL, -- if nonzero, lat/lon values may be inaccurate by this amount (same unit as coordinates) "note" TEXT, -- optional user-added information "starred" INTEGER, -- like a bookmark; TODO: different numbers indicate different kinds of stars or something? "thumb_hash" BLOB, -- bytes of the ThumbHash (https://evanw.github.io/thumbhash/ and https://github.com/evanw/thumbhash) - prefixed with aspect ratio; stored here with item for fast, easy retrieval "original_id_hash" BLOB, -- a hash of the data source and original ID of the item, also used for duplicate detection, optionally stored when item is deleted "initial_content_hash" BLOB, -- a hash computed during initial import, used for duplicate detection (remains same even if item is modified by user) "retrieval_key" BLOB UNIQUE, -- an optional opaque value that indicates this item may not be fully populated in a single import; not an ID but still a unique identifier "hidden" INTEGER, -- if owner would like to forget about this item, don't show it in search results, etc. TODO: keep? "deleted" INTEGER, -- 1 = if the columns will be erased, they have been erased; >1 = a unix epoch timestamp after which the columns can be erased FOREIGN KEY ("data_source_id") REFERENCES "data_sources"("id") ON UPDATE CASCADE, FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE, --TODO: maybe add ON DELETE CASCADE someday, which would rely on a regular sweeping of the files (garbage collection)! or we could do SET NULL FOREIGN KEY ("modified_job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE SET NULL, -- deleting that import won't undo the changes, however FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE, FOREIGN KEY ("classification_id") REFERENCES "classifications"("id") ON UPDATE CASCADE, FOREIGN KEY ("data_id") REFERENCES "item_data"("id") ON UPDATE CASCADE ON DELETE SET NULL, UNIQUE ("data_source_id", "original_id") ) STRICT; CREATE INDEX IF NOT EXISTS "idx_items_timestamp" ON "items"("timestamp"); -- the need for this is obvious CREATE INDEX IF NOT EXISTS "idx_items_data_file" ON "items"("data_file"); -- used during imports especially for maintenance -- These next two partial indexes greatly speed up processing when importing items, or any queries that -- check for existing rows that may have been deleted or modified from their original content. Because -- they are partial indexes, I have found that they do not greatly impact insert performance since most -- new items are not deleted or modified going in. CREATE INDEX IF NOT EXISTS "idx_items_deleted_original_id_hash" ON "items"("deleted", "original_id_hash") WHERE deleted IS NOT NULL OR original_id_hash IS NOT NULL; CREATE INDEX IF NOT EXISTS "idx_items_initial_content_hash" ON "items"("initial_content_hash") WHERE modified IS NOT NULL OR deleted IS NOT NULL; -- This partial index speeds up processing for items that have incoming data and we need to check for -- existing item by its data (i.e. Collection items, very common with photo libraries; one per media item!) CREATE INDEX IF NOT EXISTS "idx_items_data_text_hash" ON "items"("data_text" COLLATE NOCASE, "data_hash") WHERE data_text IS NOT NULL OR data_hash IS NOT NULL; -- This partial index speeds up processing for items that have IDs assigned by their data source. CREATE INDEX IF NOT EXISTS "idx_items_original_id" ON "items"("original_id") WHERE original_ID IS NOT NULL; -- Relationships may exist between and across items and entities. A row -- in this table is an actual connection between items and/or entities. CREATE TABLE IF NOT EXISTS "relationships" ( "id" INTEGER PRIMARY KEY, "relation_id" INTEGER NOT NULL, "value" ANY, -- optional; some relationships make sense if they contain a value (like a reaction: what was the reaction? or a collection element, what is its position?) -- exactly one of the from_ fields and one of the to_ fields should be non-NULL "from_item_id" INTEGER, "from_attribute_id" INTEGER, "to_item_id" INTEGER, "to_attribute_id" INTEGER, -- the next two fields optionally describe when the relationship started and/or ended as unix epoch timestamp in seconds "start" INTEGER, "end" INTEGER, "metadata" TEXT, -- optional extra info encoded as JSON FOREIGN KEY ("relation_id") REFERENCES "relations"("id") ON UPDATE CASCADE, FOREIGN KEY ("from_item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("to_item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("from_attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("to_attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE ) STRICT; -- This speeds up inserts because we check for duplicate relationships in the app (it's complex with potential timeframes), -- and this way only 1 index is necessary, not multiple on the various fields we check for uniqueness CREATE INDEX IF NOT EXISTS "idx_relationships_from_item_id" ON "relationships"("from_item_id"); -- Relations define the way relationships connect. They are described by natural -- language phrases such as "in reply to", "picture of", or "attached to"; or could -- be words like "siblings", "coworkers", or "related". Relations will either be -- directed (one way; to/from matter) or bidirectional (both ways), which depends -- on the phrase and whatever makes sense if you were to put it into a sentence. CREATE TABLE IF NOT EXISTS "relations" ( "id" INTEGER PRIMARY KEY, "label" TEXT NOT NULL UNIQUE, -- natural phrase "directed" BOOLEAN NOT NULL DEFAULT true, -- false = bidirectional "subordinating" BOOLEAN NOT NULL DEFAULT false -- if true, item on the end of directed relation does not make sense on its own, e.g. attachment or motion picture ); -- not STRICT due to boolean type -- A story is a user-created document with rich text formatting (basically HTML; -- including hrefs to other items/entities). It can also embed timeline data, which -- embeddings are stored in the story_elements table. CREATE TABLE IF NOT EXISTS "stories" ( "id" INTEGER PRIMARY KEY, "name" TEXT, "description" TEXT, "content" TEXT COLLATE NOCASE, "content_type" TEXT NOT NULL, -- MIME type; "text/html", "text/markdown", etc -- whatever form the content is in "modified" INTEGER NOT NULL DEFAULT (unixepoch()) ) STRICT; -- Elements of stories. An element can be rich text (HTML; including hrefs to other items/entities); -- an embedded item, entity, or collection; or a query that loads a dynamic set of items, entities, or -- collections. Rich text content can be provided together with embedded data as an annotation. CREATE TABLE IF NOT EXISTS "story_elements" ( "id" INTEGER PRIMARY KEY, "story_id" INTEGER NOT NULL, -- only one of the following 5 fields should be populated "item_id" INTEGER, -- embedded item "entity_id" INTEGER, -- embedded entity "query" TEXT, -- embedded dynamic set; omits SELECT, e.g. "FROM items|entities WHERE ... ORDER BY ... LIMIT ..." "external" TEXT, -- embedded data from other timeline/repo DB, of this format: ":|item|entity:rowid" "note" TEXT, -- optional annotation of embedded content FOREIGN KEY ("story_id") REFERENCES "stories"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE SET NULL, FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE SET NULL ) STRICT; -- A tag is a simple descriptor on an item, put there by the user for organizational purposes. CREATE TABLE IF NOT EXISTS "tags" ( "id" INTEGER PRIMARY KEY, "label" TEXT NOT NULL UNIQUE, "color" TEXT ) STRICT; -- The tags associated with items, entities, stories, or relationships. -- The same tag may not be applied multiple times to the same entity. CREATE TABLE IF NOT EXISTS "tagged" ( "id" INTEGER PRIMARY KEY, "tag_id" INTEGER NOT NULL, -- only one of the following ID fields should be populated per row "item_id" INTEGER, "entity_id" INTEGER, "attribute_id" INTEGER, "story_id" INTEGER, "relationship_id" INTEGER, "data" TEXT, -- could be used for rich relationships, e.g. the coordinates of a entity tagged in a photo, etc. -- TODO: a way to indicate if this tag came as part of an import from a data source? FOREIGN KEY ("tag_id") REFERENCES "tags"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("story_id") REFERENCES "stories"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("relationship_id") REFERENCES "relationships"("id") ON UPDATE CASCADE ON DELETE CASCADE ) STRICT; -- TODO: I might get rid of this. This table would become user-created commentary on items/entities... but those could be items themselves... CREATE TABLE IF NOT EXISTS "notes" ( "id" INTEGER PRIMARY KEY, -- only one of the following ID fields should be populated per row "item_id" INTEGER, "entity_id" INTEGER, "content" TEXT, -- a user-friendly description that doesn't fit the DB structure "metadata" TEXT, -- optional metadata structured as JSON FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE ) STRICT; -- A log of changes, notices, warnings, or errors pertaining to elements of the timeline. CREATE TABLE IF NOT EXISTS "logs" ( "id" INTEGER PRIMARY KEY, "level" INTEGER, -- 0=update/modification 1=notice 2=error "message" TEXT, "metadata" TEXT, -- optional metadata structured as JSON "timestamp" INTEGER, -- unix timestamp milliseconds -- only one of the following ID fields should be populated per row "job_id" INTEGER, "item_id" INTEGER, "entity_id" INTEGER, "attribute_id" INTEGER, "relationship_id" INTEGER, "entity_attribute_id" INTEGER, FOREIGN KEY ("job_id") REFERENCES "jobs"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("entity_id") REFERENCES "entities"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("attribute_id") REFERENCES "attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("relationship_id") REFERENCES "relationships"("id") ON UPDATE CASCADE ON DELETE CASCADE, FOREIGN KEY ("entity_attribute_id") REFERENCES "entity_attributes"("id") ON UPDATE CASCADE ON DELETE CASCADE ) STRICT; -- Embeddings enable "intelligent" search using ML models to derive semantics and meaning. -- By finding other embeddings that are close (dot product or euclidean distance), -- similarity searches are possible. This requires the sqlite-vec module. CREATE TABLE IF NOT EXISTS "embeddings" ( "id" INTEGER PRIMARY KEY, "item_id" INTEGER, "generated" INTEGER NOT NULL DEFAULT (unixepoch()), -- when the embedding was generated (timestamp in unix seconds UTC) "embedding" BLOB, -- TODO: could define as float[768] (unless STRICT) and then use `check(typeof(contents_embedding) == 'blob' AND vec_length(contents_embedding) == 768)` FOREIGN KEY ("item_id") REFERENCES "items"("id") ON UPDATE CASCADE ON DELETE CASCADE ) STRICT; CREATE INDEX IF NOT EXISTS "idx_embeddings_item_id" ON "embeddings"("item_id"); -- TODO: this is convenient -- will probably keep this, because the db-based enums like data sources and classifications -- don't get translated earlier; maybe we could, but I still need to think on that... if we do keep this, -- I wonder if it'd be useful to loop in the attribute name and value as well? for item de-duplication in loadItemRow().... CREATE VIEW IF NOT EXISTS "extended_items" AS SELECT items.*, data_sources.name AS data_source_name, data_sources.title AS data_source_title, classifications.name AS classification_name, -- TODO: verify these are useful fields and make the dashboard queries more efficient cast(strftime('%j', date(round(timestamp/1000), 'unixepoch')) AS INTEGER) AS day_of_year, cast(strftime('%W', date(round(timestamp/1000), 'unixepoch')) AS INTEGER) AS week_of_year, cast(strftime('%m', date(round(timestamp/1000), 'unixepoch')) AS INTEGER) AS month_of_year FROM items LEFT JOIN data_sources ON data_sources.id = items.data_source_id LEFT JOIN classifications ON classifications.id = items.classification_id; -- Don't allow stray attributes; if for any reason no entity_attributes rows -- point to a given attribute, always delete the attribute to clean up. The -- only reason we have them separated into two tables is to allow multiple -- entities to share an attribute, but it's pointless for an attribute to not -- be associated with any entities. CREATE TRIGGER IF NOT EXISTS prevent_stray_attributes AFTER DELETE ON entity_attributes FOR EACH ROW WHEN (SELECT count(1) FROM (SELECT id FROM entity_attributes WHERE attribute_id=OLD.attribute_id LIMIT 1)) = 0 BEGIN DELETE FROM attributes WHERE id=OLD.attribute_id; END;