292 lines
8.2 KiB
Go
292 lines
8.2 KiB
Go
/*
|
|
Timelinize
|
|
Copyright (c) 2013 Matthew Holt
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published
|
|
by the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
// Package instagram implements a data source for importing data from Instagram archive files.
|
|
package instagram
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"path"
|
|
"time"
|
|
|
|
"github.com/timelinize/timelinize/datasources/facebook"
|
|
"github.com/timelinize/timelinize/timeline"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
func init() {
|
|
err := timeline.RegisterDataSource(timeline.DataSource{
|
|
Name: "instagram",
|
|
Title: "Instagram",
|
|
Icon: "instagram.svg",
|
|
NewFileImporter: func() timeline.FileImporter { return new(Client) },
|
|
})
|
|
if err != nil {
|
|
timeline.Log.Fatal("registering data source", zap.Error(err))
|
|
}
|
|
}
|
|
|
|
// Client implements the timeline.Client interface.
|
|
type Client struct{}
|
|
|
|
// Recognize returns whether the file or folder is recognized.
|
|
func (Client) Recognize(_ context.Context, dirEntry timeline.DirEntry, _ timeline.RecognizeParams) (timeline.Recognition, error) {
|
|
if dirEntry.FileExists("personal_information/personal_information/instagram_profile_information.json") &&
|
|
dirEntry.FileExists("your_instagram_activity") {
|
|
return timeline.Recognition{Confidence: 1.0}, nil
|
|
}
|
|
if dirEntry.FileExists("media") &&
|
|
(dirEntry.FileExists(personalInformationPath2025) ||
|
|
dirEntry.FileExists(personalInformationPathPre2025) ||
|
|
dirEntry.FileExists(personalInformation2021)) {
|
|
return timeline.Recognition{Confidence: .95}, nil
|
|
}
|
|
return timeline.Recognition{}, nil
|
|
}
|
|
|
|
// FileImport imports data from the file or folder.
|
|
func (c *Client) FileImport(_ context.Context, dirEntry timeline.DirEntry, params timeline.ImportParams) error {
|
|
// first, load the profile information
|
|
pi, err := c.getPersonalInfo(dirEntry.FS)
|
|
if err != nil {
|
|
return fmt.Errorf("loading profile: %w", err)
|
|
}
|
|
if len(pi.ProfileUser) == 0 {
|
|
return errors.New("no profile information found: missing profile user")
|
|
}
|
|
personalInfo := pi.ProfileUser[0].StringMapData
|
|
|
|
owner := timeline.Entity{
|
|
Name: personalInfo.Name.Value,
|
|
Attributes: []timeline.Attribute{
|
|
{
|
|
Name: "instagram_username",
|
|
Value: personalInfo.Username.Value,
|
|
Identity: true,
|
|
},
|
|
{
|
|
Name: timeline.AttributeGender,
|
|
Value: personalInfo.Gender.Value,
|
|
},
|
|
{
|
|
Name: timeline.AttributePhoneNumber,
|
|
Value: personalInfo.PhoneNumber.Value,
|
|
Identifying: true,
|
|
},
|
|
{
|
|
Name: timeline.AttributeEmail,
|
|
Value: personalInfo.Email.Value,
|
|
Identifying: true,
|
|
},
|
|
{
|
|
Name: "instagram_bio",
|
|
Value: personalInfo.Bio.Value,
|
|
},
|
|
{
|
|
Name: "website",
|
|
Value: personalInfo.Website.Value,
|
|
},
|
|
},
|
|
}
|
|
if picFilename := pi.ProfileUser[0].MediaMapData.ProfilePhoto.URI; picFilename != "" {
|
|
owner.NewPicture = func(_ context.Context) (io.ReadCloser, error) {
|
|
return dirEntry.FS.Open(picFilename)
|
|
}
|
|
}
|
|
if personalInfo.DateOfBirth.Value != "" && personalInfo.DateOfBirth.Value != "1919-01-01" { // for some weird reason their default is 1919??
|
|
bd, err := time.Parse("2006-01-02", personalInfo.DateOfBirth.Value)
|
|
if err == nil {
|
|
owner.Attributes = append(owner.Attributes, timeline.Attribute{
|
|
Name: "birth_date",
|
|
Value: bd,
|
|
})
|
|
}
|
|
}
|
|
|
|
// then, load the posts index
|
|
postIdx, err := c.getPostsIndex(dirEntry.FS)
|
|
if err != nil {
|
|
return fmt.Errorf("loading index: %w", err)
|
|
}
|
|
for _, post := range postIdx {
|
|
// a post may have multiple media items, we'll treat them as attachments
|
|
|
|
var ig *timeline.Graph
|
|
var firstMedia int
|
|
|
|
// if there is text, use that as the "main" item
|
|
if postText := post.allText(); postText != "" {
|
|
ig = &timeline.Graph{
|
|
Item: &timeline.Item{
|
|
Classification: timeline.ClassSocial,
|
|
Timestamp: post.timestamp(),
|
|
Owner: owner,
|
|
Content: timeline.ItemData{
|
|
Data: timeline.StringData(postText),
|
|
},
|
|
IntermediateLocation: post.filename,
|
|
},
|
|
}
|
|
} else if len(post.Media) > 0 {
|
|
item := post.Media[0].timelineItem(dirEntry.FS, owner)
|
|
ig = &timeline.Graph{Item: item}
|
|
firstMedia = 1 // the 0th media was used as the root of the graph
|
|
}
|
|
|
|
// add remaining media to graph
|
|
for i := firstMedia; i < len(post.Media); i++ {
|
|
ig.ToItem(timeline.RelAttachment, post.Media[i].timelineItem(dirEntry.FS, owner))
|
|
}
|
|
|
|
params.Pipeline <- ig
|
|
}
|
|
|
|
// stories
|
|
// TODO: Maybe stories should go into a collection
|
|
storyIdx, err := c.getStoryIndex(dirEntry.FS, params.Log)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, story := range storyIdx.IgStories {
|
|
params.Pipeline <- &timeline.Graph{
|
|
Item: &timeline.Item{
|
|
Timestamp: time.Unix(story.CreationTimestamp, 0).UTC(),
|
|
Owner: owner,
|
|
IntermediateLocation: story.URI,
|
|
Content: timeline.ItemData{
|
|
Filename: path.Base(story.URI),
|
|
Data: func(_ context.Context) (io.ReadCloser, error) {
|
|
return dirEntry.FS.Open(story.URI)
|
|
},
|
|
},
|
|
Metadata: timeline.Metadata{
|
|
"Caption": facebook.FixString(story.Title),
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// messages
|
|
err = facebook.GetMessages("instagram", dirEntry, params)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Client) getPersonalInfo(fsys fs.FS) (instaPersonalInformation, error) {
|
|
var pi instaPersonalInformation
|
|
|
|
file, err := fsys.Open(personalInformationPathPre2025)
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
file, err = fsys.Open(personalInformationPath2025)
|
|
}
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
file, err = fsys.Open(personalInformation2021)
|
|
}
|
|
if err != nil {
|
|
return pi, err
|
|
}
|
|
defer file.Close()
|
|
|
|
err = json.NewDecoder(file).Decode(&pi)
|
|
if err != nil {
|
|
return pi, fmt.Errorf("decoding personal information file: %w", err)
|
|
}
|
|
return pi, nil
|
|
}
|
|
|
|
func (c *Client) getPostsIndex(fsys fs.FS) (instaPostsIndex, error) {
|
|
var all instaPostsIndex
|
|
|
|
makePostsFilename := func(prefix string, i int) string {
|
|
return fmt.Sprintf("%s%d.json", prefix, i)
|
|
}
|
|
|
|
for i := 1; i < 10000; i++ {
|
|
// try different paths until we get the one that exists (the archive layout changed over the years)
|
|
postsFilename := makePostsFilename(instaPostsIndexPrefix2025, i)
|
|
file, err := fsys.Open(postsFilename)
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
postsFilename = makePostsFilename(instaPostsIndexPrefixPre2025, i)
|
|
file, err = fsys.Open(postsFilename)
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
break
|
|
}
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var idx instaPostsIndex
|
|
err = json.NewDecoder(file).Decode(&idx)
|
|
file.Close()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("decoding posts index file %s: %w", postsFilename, err)
|
|
}
|
|
|
|
for i := range idx {
|
|
idx[i].filename = postsFilename
|
|
}
|
|
|
|
all = append(all, idx...)
|
|
}
|
|
|
|
return all, nil
|
|
}
|
|
|
|
func (c *Client) getStoryIndex(fsys fs.FS, logger *zap.Logger) (instaStories, error) {
|
|
file, err := fsys.Open(instaStoryIndex2025)
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
file, err = fsys.Open(instaStoryIndexPre2025)
|
|
}
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
logger.Warn("no Instagram stories found")
|
|
return instaStories{}, nil
|
|
}
|
|
if err != nil {
|
|
return instaStories{}, err
|
|
}
|
|
defer file.Close()
|
|
|
|
var idx instaStories
|
|
err = json.NewDecoder(file).Decode(&idx)
|
|
if err != nil {
|
|
return idx, fmt.Errorf("decoding stories index file: %w", err)
|
|
}
|
|
|
|
return idx, nil
|
|
}
|
|
|
|
const (
|
|
personalInformation2021 = "account_information/personal_information.json"
|
|
personalInformationPathPre2025 = "personal_information/personal_information.json"
|
|
personalInformationPath2025 = "personal_information/personal_information/personal_information.json"
|
|
|
|
instaPostsIndexPrefixPre2025 = "content/posts_"
|
|
instaPostsIndexPrefix2025 = "your_instagram_activity/media/posts_"
|
|
|
|
instaStoryIndexPre2025 = "content/stories.json"
|
|
instaStoryIndex2025 = "your_instagram_activity/media/stories.json"
|
|
)
|