/* Timelinize Copyright (c) 2013 Matthew Holt This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ // Package imessage implements a data source for Apple Messages (iMessage), either // directly from the iMessage folder on a Macbook, or an iPhone backup. package imessage import ( "context" "database/sql" "encoding/hex" "fmt" "io" "io/fs" "log" "path" "path/filepath" "slices" "strings" "time" "github.com/timelinize/timelinize/timeline" "github.com/zeebo/blake3" "go.uber.org/zap" ) func init() { err := timeline.RegisterDataSource(timeline.DataSource{ Name: "imessage", Title: "iMessage", Icon: "imessage.svg", NewOptions: func() any { return new(Options) }, NewFileImporter: func() timeline.FileImporter { return new(FileImporter) }, }) if err != nil { timeline.Log.Fatal("registering data source", zap.Error(err)) } } // FileImporter can import from the iMessage database. type FileImporter struct{} // Recognize returns whether this file or folder is supported. func (FileImporter) Recognize(_ context.Context, dirEntry timeline.DirEntry, _ timeline.RecognizeParams) (timeline.Recognition, error) { if chatDBPath(dirEntry) != "" { return timeline.Recognition{Confidence: .85}, nil } return timeline.Recognition{}, nil } // FileImport imports data from the given file or folder. func (fimp *FileImporter) FileImport(ctx context.Context, dirEntry timeline.DirEntry, params timeline.ImportParams) error { // open messages DB as read-only and prepare importer // note that the SQL APIs don't support io/fs APIs, so we cheat and just access the disk directly dbPath := filepath.Join(dirEntry.FSRoot, filepath.FromSlash(chatDBPath(dirEntry))) db, err := sql.Open("sqlite3", dbPath+"?mode=ro") if err != nil { return fmt.Errorf("opening chat.db: %w", err) } defer db.Close() im := Importer{ DB: db, FillOutAttachmentItem: func(_ context.Context, filename string, attachment *timeline.Item) { chatDBFolderPath := path.Dir(chatDBPath(dirEntry)) relativeFilename := strings.TrimPrefix(filename, "~/Library/Messages/") attachment.OriginalLocation = relativeFilename attachment.Content.Data = func(context.Context) (io.ReadCloser, error) { return dirEntry.FS.Open(path.Join(path.Clean(chatDBFolderPath), path.Clean(relativeFilename))) } }, } if err := im.ImportMessages(ctx, params); err != nil { return err } return nil } // Importer is a type that can import messages from an Apple Messages chat DB. type Importer struct { // An open chat.db sqlite file. DB *sql.DB // A callback that uses a filename as provided by // the database to fill out the OriginalLocation, // IntermediateLocation, and Content.Data fields // on an attachment before it is added to the graph. // // Unique to this particular data source, the item's // Content.Data function must be able to be reused. // Generally, it is not assumed that the data will // be read more than once, but to prevent duplicates // (since, I have seen a message attachment be byte- // for-byte duplicated, even though only one appears // in the app), we may read the content once before // sending it to the processor, which reads it again. FillOutAttachmentItem func(ctx context.Context, filename string, attachmentItem *timeline.Item) // Typically phone number; only used if // message row doesn't contain this info. DeviceID string } // ImportMessages imports messages from the chat DB. func (im Importer) ImportMessages(ctx context.Context, opt timeline.ImportParams) error { var whereClause string var args []any // build WHERE clause: honor configured timeframe to greatly speed up such imports if !opt.Timeframe.IsEmpty() { if opt.Timeframe.Since != nil { appleTsStart := TimeToCocoaNano(*opt.Timeframe.Since) whereClause = "WHERE m.date >= ?" args = append(args, appleTsStart) } if opt.Timeframe.Until != nil { if whereClause == "" { whereClause = "WHERE " } else { whereClause = " AND " } appleTsEnd := TimeToCocoaNano(*opt.Timeframe.Until) whereClause += "m.date < ?" args = append(args, appleTsEnd) } } // newer versions of iMessage / Mac add columns to the message table which we support, // but we should also support older versions (for example, associated_message_emoji, // which are message reactions, is a newer feature as are threads) supportedMessageTableColumns := []string{ "ROWID", "guid", "text", "attributedBody", "service", "date", "date_read", "date_delivered", "is_from_me", "is_spam", "associated_message_guid", "associated_message_type", "associated_message_emoji", // newer column "destination_caller_id", "reply_to_guid", "thread_originator_guid", // newer column } messageTableColumns, err := GetColumnNames(ctx, im.DB, "message") if err != nil { return fmt.Errorf("getting column names: %w", err) } var sb strings.Builder sb.WriteString("SELECT\n\t") var selectedCols []string // select only from columns that exist for _, col := range supportedMessageTableColumns { if !slices.Contains(messageTableColumns, col) { opt.Log.Warn("message database does not have column in message table", zap.String("column", col)) continue } if len(selectedCols) > 0 { sb.WriteString(", ") } sb.WriteString("m.") sb.WriteString(col) selectedCols = append(selectedCols, col) } // remainder of query sb.WriteString(`, h.id, h.country, h.service, chat_h.id, chat_h.country, chat_h.service, a.guid, a.created_date, a.filename, a.mime_type, a.transfer_name FROM message AS m JOIN chat_message_join AS cmj ON cmj.message_id = m.ROWID JOIN chat_handle_join AS chj ON chj.chat_id = cmj.chat_id JOIN handle AS chat_h ON chat_h.ROWID = chj.handle_id -- handle from chat join LEFT JOIN handle AS h ON h.ROWID = m.handle_id -- handle from message row LEFT JOIN message_attachment_join AS maj ON maj.message_id = m.ROWID LEFT JOIN attachment AS a ON a.ROWID = maj.attachment_id ` + whereClause + ` ORDER BY m.ROWID ASC`) rows, err := im.DB.QueryContext(ctx, sb.String(), args...) if err != nil { return fmt.Errorf("querying for message rows: %w", err) } defer rows.Close() // a single message's info may be spread across multiple rows thanks to our LEFT JOINs var currentMessage message finalizeMessage := func() { msgItem := &timeline.Item{ ID: currentMessage.guid, Classification: timeline.ClassMessage, Timestamp: currentMessage.timestamp(), Owner: currentMessage.sender(), Content: currentMessage.content(), Metadata: currentMessage.metadata(), } attachments := currentMessage.attachments(ctx, im, msgItem.Owner) // don't fold the first attachment into an empty message item; that works // for some data sources like SMS Backup & Restore where there's no message // IDs and a bunch of references to them, but for this data source, that // breaks correctness (unless we do lots of complex bookkeeping and rewrite // the ID references and such) ig := &timeline.Graph{Item: msgItem} for _, attach := range attachments { ig.ToItem(timeline.RelAttachment, attach) } // skip empty messages, as far as we can tell; there's probably something to it, // but I've looked into it and I can't figure out what the purpose of the // message is from the database tables alone, at least -- I probably missed // something though if msgItem.Content.Data == nil && len(ig.Edges) == 0 { opt.Log.Debug("skipping empty message", zap.Int64("row_id", currentMessage.rowid), zap.String("guid", currentMessage.guid)) return } // now that we've checked for actual content, go ahead and connect entities for _, recip := range currentMessage.sentTo() { ig.ToEntity(timeline.RelSent, recip) } // if this is a reply, make root of item graph the item this is replying to if currentMessage.replyToGUID != nil && currentMessage.threadOriginatorGUID != nil { ig.ToItem(timeline.RelReply, &timeline.Item{ID: *currentMessage.replyToGUID}) } opt.Pipeline <- ig } for rows.Next() { var msg message var joinedH handle var attach attachment // these go at the end... hardCodedTargets := []any{ &msg.handle.id, &msg.handle.country, &msg.handle.service, &joinedH.id, &joinedH.country, &joinedH.service, &attach.guid, &attach.createdDate, &attach.filename, &attach.mimeType, &attach.transferName, } // because the select clause is dynamically generated, we also need to // dynamically generate the list of targets... targets := make([]any, 0, len(selectedCols)+len(hardCodedTargets)) for _, col := range selectedCols { switch col { case "ROWID": targets = append(targets, &msg.rowid) case "guid": targets = append(targets, &msg.guid) case "text": targets = append(targets, &msg.text) case "attributedBody": targets = append(targets, &msg.attributedBody) case "service": targets = append(targets, &msg.service) case "date": targets = append(targets, &msg.date) case "date_read": targets = append(targets, &msg.dateRead) case "date_delivered": targets = append(targets, &msg.dateDelivered) case "is_from_me": targets = append(targets, &msg.isFromMe) case "is_spam": targets = append(targets, &msg.isSpam) case "associated_message_guid": targets = append(targets, &msg.associatedMessageGUID) case "associated_message_type": targets = append(targets, &msg.associatedMessageType) case "associated_message_emoji": targets = append(targets, &msg.associatedMessageEmoji) case "destination_caller_id": targets = append(targets, &msg.destinationCallerID) case "reply_to_guid": targets = append(targets, &msg.replyToGUID) case "thread_originator_guid": targets = append(targets, &msg.threadOriginatorGUID) } } targets = append(targets, hardCodedTargets...) err := rows.Scan(targets...) if err != nil { return fmt.Errorf("scanning row: %w", err) } if msg.rowid == 0 { continue // I've never seen this, but might as well be careful } // fill in the normalized ID (phone number, usually) of the device; prefer the entry in the DB, I guess, // but fall back to the device's telephony/commcenter information if needed if msg.destinationCallerID != nil { msg.normalizedCallerID = NormalizePhoneNumber(ctx, *msg.destinationCallerID, "") } else { msg.normalizedCallerID = im.DeviceID } // if message is a reaction handle it separately if msg.associatedMessageGUID != nil && msg.associatedMessageType != nil { reaction := messageReactions[*msg.associatedMessageType] // TODO: a message type of 3006 means removing the reaction placed with a message of type 2006. if *msg.associatedMessageType == 2006 && msg.associatedMessageEmoji != nil { reaction = *msg.associatedMessageEmoji } reactor := msg.sender() // oftentimes, the associated_message_guid values are prefixed by a string like "p:0/" or // "bp:" but I don't know what they're for, and apparently we can ignore them for our purposes _, associatedGUID, cut := strings.Cut(*msg.associatedMessageGUID, "/") if !cut { _, associatedGUID, cut = strings.Cut(*msg.associatedMessageGUID, ":") } if !cut { associatedGUID = *msg.associatedMessageGUID } // I found that not all of these associated message GUIDs are found in the DB. Shortly after I got my iPhone, // there was a brief period where I couldn't receive messages. My Macbook did, but none of the messages I sent // from my iPhone during that time ever appeared on my Macbook. Thus, reactions from my contact(s) to my // messages(s) have broken GUIDs. So here we do a quick check to see if the referenced row exists first. var count int err := im.DB.QueryRowContext(ctx, "SELECT count() FROM message WHERE guid=? LIMIT 1", associatedGUID).Scan(&count) if err == nil && count > 0 { reactionGraph := &timeline.Graph{ Item: &timeline.Item{ID: associatedGUID}, } reactionGraph.FromEntityWithValue(&reactor, timeline.RelReacted, reaction) opt.Pipeline <- reactionGraph continue } } // start of new message if msg.rowid != currentMessage.rowid { // finish previous one and process it if currentMessage.rowid > 0 { finalizeMessage() } // advance the row ID we're working on currentMessage = msg } // continuation (or still first row) of same message; add all joined data if attach.guid != nil { currentMessage.attached = append(currentMessage.attached, attach) } if joinedH.id != nil { currentMessage.participants = append(currentMessage.participants, joinedH) } } if err = rows.Err(); err != nil { return fmt.Errorf("scanning rows: %w", err) } // don't forget to process the last one too! if currentMessage.rowid > 0 { finalizeMessage() } return nil } func chatDBPath(input timeline.DirEntry) string { info, err := fs.Stat(input.FS, input.Filename) if err != nil { return "" } // To be 100% confident we should open chat.db and see if we can query it... if !info.IsDir() && input.Name() == "chat.db" && timeline.FileExistsFS(input.FS, path.Join(path.Dir(input.Filename), "Attachments")) { return input.Filename } else if info.IsDir() && input.FileExists("Attachments") && input.FileExists("chat.db") { return path.Join(input.Filename, "chat.db") } return "" } type message struct { rowid int64 guid string text *string attributedBody []byte // a rich text representation of the message; seems to be the newer way, as of iOS 16-ish service *string date *int64 dateRead *int64 dateDelivered *int64 isFromMe *int isSpam *int // seems to be the device's phone number! destinationCallerID *string normalizedCallerID string // message reactions associatedMessageGUID *string associatedMessageType *int associatedMessageEmoji *string replyToGUID *string // always seems to be set, even if not an explicit reply via user gesture threadOriginatorGUID *string // I think this is only set when the user explicitly makes a thread (by replying) handle handle participants []handle attached []attachment } func (m message) timestamp() time.Time { if m.date == nil { return time.Time{} } return CocoaNanoToTime(*m.date) } // fromMe returns true if the message was sent by the owner of the device. func (m message) fromMe() bool { if m.isFromMe == nil { return false } return *m.isFromMe == 1 } // senderID returns the normalized sender ID (phone number, or sometimes email). func (m message) senderID() string { if m.fromMe() { return m.normalizedCallerID } return m.handle.normalizedID() } func (m message) sender() timeline.Entity { return entityWithID(m.senderID()) } func (m message) sentTo() []*timeline.Entity { senderID := m.senderID() var ents []*timeline.Entity // if device is not the sender, it is at least a recipient! if !m.fromMe() { e := entityWithID(m.normalizedCallerID) ents = append(ents, &e) } // add participants, skipping the sender to avoid redundancy for _, p := range m.participants { pID := p.normalizedID() if pID == senderID { continue } e := p.entity() ents = append(ents, &e) } return ents } func (m message) attachments(ctx context.Context, im Importer, sender timeline.Entity) []*timeline.Item { var items []*timeline.Item hashes := make(map[string]struct{}) // to prevent duplicate attachments for _, a := range m.attached { if a.filename == nil { continue } var guid string if a.guid != nil { guid = *a.guid } var ts time.Time if a.createdDate != nil { ts = CocoaSecondsToTime(*a.createdDate) } it := &timeline.Item{ ID: guid, Classification: timeline.ClassMessage, Timestamp: ts, Owner: sender, Content: timeline.ItemData{ Filename: path.Base(*a.filename), }, } if a.mimeType != nil { it.Content.MediaType = *a.mimeType } if im.FillOutAttachmentItem != nil { im.FillOutAttachmentItem(ctx, *a.filename, it) } // if there is only one attachment, we can simply append // it as an item; if there are more, we may need to // deduplicate them. in rare situations, I have seen // byte-for-byte duplicate photos in the db with different // GUIDs and I can't explain why; this solution is hacky // for sure, but we have the luxury of being able to reuse // the Data function to read the file more than once, so // we hash each attachment to remember it and avoid dupes // (the processor will not be able to dedupe the items b/c // they have different IDs... but it would only store one // copy of the actual data file, but this is not good // enough for us in this case) if len(m.attached) == 1 { items = append(items, it) } else { h := blake3.New() rdr, err := it.Content.Data(ctx) if err == nil { _, _ = io.Copy(h, rdr) rdr.Close() sum := hex.EncodeToString(h.Sum(nil)) // only remember hash and keep the item if attachment data is unique if _, seen := hashes[sum]; !seen { hashes[sum] = struct{}{} items = append(items, it) } } } } return items } func (m message) content() timeline.ItemData { var text string if m.text != nil { text = *m.text } else if len(m.attributedBody) > 0 { nsStringContent, err := parseStreamTypedNSString(m.attributedBody) if err != nil { // TODO: log this properly or something? log.Printf("[ERROR] failed parsing NSString streamtyped from attributedBody: %v (body=%v)", err, m.attributedBody) return timeline.ItemData{} } text = nsStringContent } // iMessage uses the Object Replacement Character (efbfbc) as the textual message for attachments text = strings.ReplaceAll(text, "\uFFFC", "") if strings.TrimSpace(text) == "" { return timeline.ItemData{} } return timeline.ItemData{Data: timeline.StringData(text)} } func (m message) metadata() timeline.Metadata { meta := make(timeline.Metadata) if m.service != nil { meta["Service"] = *m.service } if m.dateRead != nil { meta["Date read"] = CocoaNanoToTime(*m.dateRead) } if m.dateDelivered != nil { meta["Date delivered"] = CocoaNanoToTime(*m.dateDelivered) } return meta } type handle struct { id *string country *string service *string } func (h handle) ID() string { if h.id == nil { return "" } return *h.id } func (h handle) normalizedID() string { var country string if h.country != nil { country = strings.ToUpper(*h.country) } return NormalizePhoneNumber(context.TODO(), h.ID(), country) } func (h handle) entity() timeline.Entity { if h.id == nil { return timeline.Entity{} } return timeline.Entity{ Attributes: []timeline.Attribute{ { Name: timeline.AttributePhoneNumber, Value: *h.id, Identity: true, }, }, } } type attachment struct { guid *string createdDate *int64 filename *string mimeType *string transferName *string // filename that was transmitted } // NormalizePhoneNumber tries to NormalizePhoneNumber the ID (phone number), and simply returns the input if it fails. func NormalizePhoneNumber(ctx context.Context, id, country string) string { norm, err := timeline.NormalizePhoneNumber(ctx, id, country) if err != nil { return id // oh well } return norm } // GetColumnNames returns the names of the columns from the given table name in the given sqlite database. func GetColumnNames(ctx context.Context, db *sql.DB, tableName string) ([]string, error) { rows, err := db.QueryContext(ctx, "SELECT name FROM pragma_table_info(?)", tableName) if err != nil { return nil, fmt.Errorf("querying table info: %w", err) } defer rows.Close() var cols []string for rows.Next() { var col string if err := rows.Scan(&col); err != nil { return nil, fmt.Errorf("scanning row: %w", err) } cols = append(cols, col) } if err := rows.Err(); err != nil { return nil, fmt.Errorf("next row: %w", err) } return cols, nil } func entityWithID(id string) timeline.Entity { attribute := timeline.AttributePhoneNumber if strings.Contains(id, "@") { attribute = timeline.AttributeEmail } return timeline.Entity{ Attributes: []timeline.Attribute{ { Name: attribute, Value: id, Identity: true, }, }, } } // TODO: These but in the 3000s are for removing the reaction. var messageReactions = map[int]string{ 2000: "\u2764\uFE0F", // red heart, but it appears black without the modifier: ❤️ 2001: "👍", 2002: "👎", 2003: "😂", 2004: "\u203C\uFE0F", // red double-exclamation, but it appears plain black without the modifier: ‼️ 2005: "❓", } // Options configures the data source. type Options struct{}