1
0
Fork 0
timelinize/datasources/instagram/instagram.go
2025-10-15 08:37:17 -06:00

292 lines
8.2 KiB
Go

/*
Timelinize
Copyright (c) 2013 Matthew Holt
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// Package instagram implements a data source for importing data from Instagram archive files.
package instagram
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"path"
"time"
"github.com/timelinize/timelinize/datasources/facebook"
"github.com/timelinize/timelinize/timeline"
"go.uber.org/zap"
)
func init() {
err := timeline.RegisterDataSource(timeline.DataSource{
Name: "instagram",
Title: "Instagram",
Icon: "instagram.svg",
NewFileImporter: func() timeline.FileImporter { return new(Client) },
})
if err != nil {
timeline.Log.Fatal("registering data source", zap.Error(err))
}
}
// Client implements the timeline.Client interface.
type Client struct{}
// Recognize returns whether the file or folder is recognized.
func (Client) Recognize(_ context.Context, dirEntry timeline.DirEntry, _ timeline.RecognizeParams) (timeline.Recognition, error) {
if dirEntry.FileExists("personal_information/personal_information/instagram_profile_information.json") &&
dirEntry.FileExists("your_instagram_activity") {
return timeline.Recognition{Confidence: 1.0}, nil
}
if dirEntry.FileExists("media") &&
(dirEntry.FileExists(personalInformationPath2025) ||
dirEntry.FileExists(personalInformationPathPre2025) ||
dirEntry.FileExists(personalInformation2021)) {
return timeline.Recognition{Confidence: .95}, nil
}
return timeline.Recognition{}, nil
}
// FileImport imports data from the file or folder.
func (c *Client) FileImport(_ context.Context, dirEntry timeline.DirEntry, params timeline.ImportParams) error {
// first, load the profile information
pi, err := c.getPersonalInfo(dirEntry.FS)
if err != nil {
return fmt.Errorf("loading profile: %w", err)
}
if len(pi.ProfileUser) == 0 {
return errors.New("no profile information found: missing profile user")
}
personalInfo := pi.ProfileUser[0].StringMapData
owner := timeline.Entity{
Name: personalInfo.Name.Value,
Attributes: []timeline.Attribute{
{
Name: "instagram_username",
Value: personalInfo.Username.Value,
Identity: true,
},
{
Name: timeline.AttributeGender,
Value: personalInfo.Gender.Value,
},
{
Name: timeline.AttributePhoneNumber,
Value: personalInfo.PhoneNumber.Value,
Identifying: true,
},
{
Name: timeline.AttributeEmail,
Value: personalInfo.Email.Value,
Identifying: true,
},
{
Name: "instagram_bio",
Value: personalInfo.Bio.Value,
},
{
Name: "website",
Value: personalInfo.Website.Value,
},
},
}
if picFilename := pi.ProfileUser[0].MediaMapData.ProfilePhoto.URI; picFilename != "" {
owner.NewPicture = func(_ context.Context) (io.ReadCloser, error) {
return dirEntry.FS.Open(picFilename)
}
}
if personalInfo.DateOfBirth.Value != "" && personalInfo.DateOfBirth.Value != "1919-01-01" { // for some weird reason their default is 1919??
bd, err := time.Parse("2006-01-02", personalInfo.DateOfBirth.Value)
if err == nil {
owner.Attributes = append(owner.Attributes, timeline.Attribute{
Name: "birth_date",
Value: bd,
})
}
}
// then, load the posts index
postIdx, err := c.getPostsIndex(dirEntry.FS)
if err != nil {
return fmt.Errorf("loading index: %w", err)
}
for _, post := range postIdx {
// a post may have multiple media items, we'll treat them as attachments
var ig *timeline.Graph
var firstMedia int
// if there is text, use that as the "main" item
if postText := post.allText(); postText != "" {
ig = &timeline.Graph{
Item: &timeline.Item{
Classification: timeline.ClassSocial,
Timestamp: post.timestamp(),
Owner: owner,
Content: timeline.ItemData{
Data: timeline.StringData(postText),
},
IntermediateLocation: post.filename,
},
}
} else if len(post.Media) > 0 {
item := post.Media[0].timelineItem(dirEntry.FS, owner)
ig = &timeline.Graph{Item: item}
firstMedia = 1 // the 0th media was used as the root of the graph
}
// add remaining media to graph
for i := firstMedia; i < len(post.Media); i++ {
ig.ToItem(timeline.RelAttachment, post.Media[i].timelineItem(dirEntry.FS, owner))
}
params.Pipeline <- ig
}
// stories
// TODO: Maybe stories should go into a collection
storyIdx, err := c.getStoryIndex(dirEntry.FS, params.Log)
if err != nil {
return err
}
for _, story := range storyIdx.IgStories {
params.Pipeline <- &timeline.Graph{
Item: &timeline.Item{
Timestamp: time.Unix(story.CreationTimestamp, 0).UTC(),
Owner: owner,
IntermediateLocation: story.URI,
Content: timeline.ItemData{
Filename: path.Base(story.URI),
Data: func(_ context.Context) (io.ReadCloser, error) {
return dirEntry.FS.Open(story.URI)
},
},
Metadata: timeline.Metadata{
"Caption": facebook.FixString(story.Title),
},
},
}
}
// messages
err = facebook.GetMessages("instagram", dirEntry, params)
if err != nil {
return err
}
return nil
}
func (c *Client) getPersonalInfo(fsys fs.FS) (instaPersonalInformation, error) {
var pi instaPersonalInformation
file, err := fsys.Open(personalInformationPathPre2025)
if errors.Is(err, fs.ErrNotExist) {
file, err = fsys.Open(personalInformationPath2025)
}
if errors.Is(err, fs.ErrNotExist) {
file, err = fsys.Open(personalInformation2021)
}
if err != nil {
return pi, err
}
defer file.Close()
err = json.NewDecoder(file).Decode(&pi)
if err != nil {
return pi, fmt.Errorf("decoding personal information file: %w", err)
}
return pi, nil
}
func (c *Client) getPostsIndex(fsys fs.FS) (instaPostsIndex, error) {
var all instaPostsIndex
makePostsFilename := func(prefix string, i int) string {
return fmt.Sprintf("%s%d.json", prefix, i)
}
for i := 1; i < 10000; i++ {
// try different paths until we get the one that exists (the archive layout changed over the years)
postsFilename := makePostsFilename(instaPostsIndexPrefix2025, i)
file, err := fsys.Open(postsFilename)
if errors.Is(err, fs.ErrNotExist) {
postsFilename = makePostsFilename(instaPostsIndexPrefixPre2025, i)
file, err = fsys.Open(postsFilename)
if errors.Is(err, fs.ErrNotExist) {
break
}
}
if err != nil {
return nil, err
}
var idx instaPostsIndex
err = json.NewDecoder(file).Decode(&idx)
file.Close()
if err != nil {
return nil, fmt.Errorf("decoding posts index file %s: %w", postsFilename, err)
}
for i := range idx {
idx[i].filename = postsFilename
}
all = append(all, idx...)
}
return all, nil
}
func (c *Client) getStoryIndex(fsys fs.FS, logger *zap.Logger) (instaStories, error) {
file, err := fsys.Open(instaStoryIndex2025)
if errors.Is(err, fs.ErrNotExist) {
file, err = fsys.Open(instaStoryIndexPre2025)
}
if errors.Is(err, fs.ErrNotExist) {
logger.Warn("no Instagram stories found")
return instaStories{}, nil
}
if err != nil {
return instaStories{}, err
}
defer file.Close()
var idx instaStories
err = json.NewDecoder(file).Decode(&idx)
if err != nil {
return idx, fmt.Errorf("decoding stories index file: %w", err)
}
return idx, nil
}
const (
personalInformation2021 = "account_information/personal_information.json"
personalInformationPathPre2025 = "personal_information/personal_information.json"
personalInformationPath2025 = "personal_information/personal_information/personal_information.json"
instaPostsIndexPrefixPre2025 = "content/posts_"
instaPostsIndexPrefix2025 = "your_instagram_activity/media/posts_"
instaStoryIndexPre2025 = "content/stories.json"
instaStoryIndex2025 = "your_instagram_activity/media/stories.json"
)