1
0
Fork 0
timelinize/datasources/media/metadata.go
Matthew Holt 2b5fd57259
Proper support for mixed timestamps and time zones
This will be a long-time WIP, but we now support full timestamps with local time offsets, absolute ones with UTC times only, and wall times only.

Several other fixes/enhancements. Making an effort to display time zone in time displays throughout the app.

Can now try to infer time zones during import, which is the default setting.

This will take a while to fully implement but it's a good start. Just have to be really careful about date crafting/manipulation/parsing.
2025-09-12 11:17:49 -06:00

588 lines
19 KiB
Go

/*
Timelinize
Copyright (c) 2013 Matthew Holt
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package media
import (
"bytes"
"errors"
"fmt"
"io"
"io/fs"
"math"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/abema/go-mp4"
"github.com/cozy/goexif2/exif"
"github.com/cozy/goexif2/mknote"
"github.com/cozy/goexif2/tiff"
"github.com/dhowden/tag"
"github.com/mholt/go-xmp/xmp"
"github.com/timelinize/timelinize/timeline"
"go.uber.org/zap"
)
func init() {
exif.RegisterParsers(mknote.All...)
}
type exifWalkerFunc func(exif.FieldName, *tiff.Tag) error
func (w exifWalkerFunc) Walk(name exif.FieldName, tag *tiff.Tag) error {
return w(name, tag)
}
// ExtractAllMetadata reads the file at the given path in the given file system and tries to extract any and all metadata
// it can find. The metadata will be used to set properties on the item including Timestamp, Location, and the Metadata map.
// If an embedded image is found, it will be returned.
func ExtractAllMetadata(logger *zap.Logger, fsys fs.FS, path string, item *timeline.Item, policy timeline.MetadataMergePolicy) (*tag.Picture, error) {
logger = logger.With(zap.String("filepath", path))
file, err := fsys.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
// prepare to read the file; seeking ability is required
var fileSeeker io.ReadSeeker
if seeker, ok := file.(io.ReadSeeker); ok {
fileSeeker = seeker
} else {
// since this isn't a seekable reader, we have to make it seekable by reading it
// into memory; obviously this is problematic if the file is large, but even if
// it is, we can hope that the metadata is at the beginning (sometimes it is at
// the end; but usually it is at the beginning)...
const maxBufferSize = 1024 * 1024 * 50
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
defer bufPool.Put(buf)
lr := io.LimitReader(file, maxBufferSize)
if _, err := io.Copy(buf, lr); err != nil {
return nil, err
}
fileSeeker = bytes.NewReader(buf.Bytes())
}
// EXIF
meta, err := extractEXIFMetadata(logger, fileSeeker, item)
if err != nil {
logger.Warn("processing EXIF metadata", zap.Error(err))
}
item.AddMetadata(meta, policy)
if _, err = fileSeeker.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("could not rewind file after EXIF: %w", err)
}
// XMP
meta, err = extractXMPMetadata(logger, fileSeeker)
if err != nil {
logger.Warn("processing XMP metadata", zap.Error(err))
}
item.AddMetadata(meta, policy)
if _, err = fileSeeker.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("could not rewind file after XMP: %w", err)
}
// MP4
meta, err = readMP4Metadata(logger, item, fileSeeker)
if err != nil {
return nil, fmt.Errorf("processing MP4 metadata: %w", err)
}
item.AddMetadata(meta, policy)
if _, err = fileSeeker.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("could not rewind file after MP4: %w", err)
}
// ID3/Tags (Audio)
meta, pic, err := readAudioMetadata(logger, fileSeeker)
if err != nil {
return nil, fmt.Errorf("processing tag metadata: %w", err)
}
item.AddMetadata(meta, policy)
return pic, nil
}
func extractEXIFMetadata(logger *zap.Logger, file io.Reader, item *timeline.Item) (timeline.Metadata, error) {
ex, err := exif.Decode(file)
if err != nil && exif.IsCriticalError(err) {
return nil, fmt.Errorf("decoding exif from file: %w", err)
}
// fill in timestamp
if item.Timestamp.IsZero() {
if ts, err := ex.DateTime(); err == nil {
item.Timestamp = ts
}
}
// fill in location coordinates
if item.Location.Latitude == nil && item.Location.Longitude == nil {
lat, lon, err := ex.LatLong()
if err == nil {
item.Location = timeline.Location{
Latitude: &lat,
Longitude: &lon,
}
}
}
// fill in remaining EXIF data
meta := make(timeline.Metadata)
err = ex.Walk(exifWalkerFunc(func(name exif.FieldName, tag *tiff.Tag) error {
// we don't skip timestamp and location fields even though we extract them
// separately, because it doesn't hurt to keep the original data, and sometimes
// the timestamp we extract is overwritten for another one, and we should
// definitely keep the original in the metadata in that case
// (specifically, we don't skip: DateTimeOriginal, DateTime, GPSLatitude, and GPSLongitude)
switch tag.Format() {
case tiff.IntVal:
for i := range int(tag.Count) {
key := splitCamelCaseIntoWords(string(name))
if tag.Count > 1 {
key += fmt.Sprintf(" %d", i+1)
}
meta[key], err = tag.Int(i)
if err != nil {
logger.Error("unable to get int from TIFF tag",
zap.Error(err),
zap.String("field_name", string(name)),
zap.Int("index", i))
}
}
case tiff.FloatVal:
for i := range int(tag.Count) {
key := splitCamelCaseIntoWords(string(name))
if tag.Count > 1 {
key += fmt.Sprintf(" %d", i+1)
}
meta[key], err = tag.Float(i)
if err != nil {
logger.Warn("unable to get float from TIFF tag",
zap.Error(err),
zap.String("field_name", string(name)),
zap.Int("index", i))
}
}
case tiff.RatVal:
for i := range int(tag.Count) {
key := splitCamelCaseIntoWords(string(name))
if tag.Count > 1 {
key += fmt.Sprintf(" %d", i+1)
}
val, err := tag.Rat(i)
if err != nil {
logger.Warn("unable to get rational from TIFF tag",
zap.Error(err),
zap.String("field_name", string(name)),
zap.Int("index", i))
} else {
// additionally, set Altitude if that's what this field happens to be
meta[key] = val
if string(name) == "GPSAltitude" && item.Location.Altitude == nil {
if valFloat, _ := val.Float64(); !math.IsInf(valFloat, 0) {
item.Location.Altitude = &valFloat
}
}
}
}
case tiff.StringVal:
meta[splitCamelCaseIntoWords(string(name))], err = tag.StringVal()
if err != nil {
logger.Warn("unable to get string from TIFF tag",
zap.Error(err),
zap.String("field_name", string(name)))
}
case tiff.OtherVal:
logger.Debug("encountered other EXIF field type",
zap.String("name", string(name)),
zap.Int("length", len(tag.Val)))
case tiff.UndefVal:
logger.Debug("encountered undefined EXIF field type",
zap.String("name", string(name)),
zap.Int("length", len(tag.Val)))
}
return nil
}))
return meta, err
}
func extractXMPMetadata(logger *zap.Logger, file io.Reader) (timeline.Metadata, error) {
xmpPackets, err := xmp.ScanPackets(file)
if err != nil {
if errors.Is(err, io.EOF) {
logger.Debug("no XMP metadata found", zap.Error(err))
return nil, nil
}
return nil, err
}
meta := make(timeline.Metadata)
for _, packet := range xmpPackets {
var doc xmp.Document
if err := xmp.Unmarshal(packet, &doc); err != nil {
return nil, fmt.Errorf("unmarshaling XMP document: %w", err)
}
paths, err := doc.ListPaths()
if err != nil {
return nil, fmt.Errorf("listing XMP paths: %w", err)
}
for _, p := range paths {
// fmt.Printf("%s = %s\n", p.Path, p.Value)
switch p.Path {
case "GCamera:MicroVideo", // before Android 11, Google used "MicroVideo"; now they use "MotionPhoto"
"GCamera:MotionPhoto",
"GCamera:MotionPhotoPresentationTimestampUs":
meta["XMP "+string(p.Path)] = p.Value
}
}
// nodes := doc.Nodes()
// for nodes != nil {
// for _, n := range nodes {
// fmt.Println("NODE:", n.XMLName.Local, n.Value)
// for _, attr := range n.Attr {
// fmt.Println(attr.Name.Local, "=", attr.Value)
// }
// for _, n := range n.Nodes {
// for _, attr := range n.Attr {
// fmt.Println("\t", attr.Name.Local, "=", attr.Value)
// }
// }
// }
// }
}
return meta, nil
}
// extractAudioAndVideoMetadata reads various kinds of audio and video tags/metadata from the specified file.
// For MP4 files, the metadata tree is traversed and any useful, interesting, or valuable data is extracted.
// For other files, ID3 tags and other kinds of metadata are traversed and extracted if known and if it may
// be of use or value.
//
// Creation timestamp and location data are added to the item directly as first-class data.
// If a picture (usually album art) was embedded, it will be returned so that it can be processed separately.
// All the other metadata is added directly to the item's metadata map according to the metadata merge policy.
//
// Relevant useful resources (as of January 2023):
// - https://cconcolato.github.io/mp4ra/
// - https://www.ftyps.com
// - https://observablehq.com/@benjamintoofer/iso-base-media-file-format
// - https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
// func extractAudioAndVideoMetadata(logger *zap.Logger, fsys fs.FS, path string, item *timeline.Item, policy timeline.MetadataMergePolicy) (*tag.Picture, error) {
// file, err := fsys.Open(path)
// if err != nil {
// return nil, err
// }
// defer file.Close()
// // prepare to read the file; seeking ability is required
// var fileSeeker io.ReadSeeker
// if seeker, ok := file.(io.ReadSeeker); ok {
// fileSeeker = seeker
// } else {
// // TODO: I have not tested this code yet, but:
// // since this isn't a seekable reader, we have to make it seekable by reading it
// // into memory; obviously this is problematic if the file is large, but even if
// // it is, we can hope that the metadata is at the beginning (sometimes it is at
// // the end; but usually it is at the beginning)...
// const maxBufferSize = 1024 * 1024 * 50
// buf := bufPool.Get().(*bytes.Buffer)
// buf.Reset()
// defer bufPool.Put(buf)
// r := io.LimitReader(file, maxBufferSize)
// if _, err := io.Copy(buf, r); err != nil {
// return nil, err
// }
// fileSeeker = bytes.NewReader(buf.Bytes())
// }
// mp4Meta, err := readMP4Metadata(logger, item, fileSeeker)
// if err != nil {
// return nil, fmt.Errorf("reading MP4 metadata: %w", err)
// }
// item.AddMetadata(mp4Meta, timeline.MetaMergeAppend)
// // reset to beginning of file; then try to read audio tags
// fileSeeker.Seek(0, io.SeekStart)
// // an EOF error is OK; just means there was no tag metadata
// tagMeta, pic, err := readAudioMetadata(fileSeeker)
// if err != nil && !errors.Is(err, io.EOF) {
// return nil, fmt.Errorf("reading tag metadata: %w", err)
// }
// item.AddMetadata(tagMeta, policy)
// return pic, nil
// }
// readMP4Metadata reads as much useful and valuable metadata from an MP4 file as we can. It sets the
// timestamp and location directly onto the item (if found) and returns the rest of the metadata.
func readMP4Metadata(logger *zap.Logger, item *timeline.Item, fileSeeker io.ReadSeeker) (timeline.Metadata, error) {
// prepare metadata container
meta := make(timeline.Metadata)
// Attempt to read MP4 metadata. This mp4 package is a low-level parser of atoms/boxes/tags
// (I don't really know what they're called tbh) that can extract all of the important fields,
// even unrecognized boxes as long as we know how to read them.
_, err := mp4.ReadBoxStructure(fileSeeker, func(h *mp4.ReadHandle) (any, error) {
if h.BoxInfo.IsSupportedType() && h.BoxInfo.Type.String() != "mdat" {
box, _, err := h.ReadPayload()
if err != nil {
return nil, fmt.Errorf("reading payload from handle: %w", err)
}
switch b := box.(type) {
case *mp4.Ftyp: // file type
meta["Major Brand"] = string(b.MajorBrand[:])
brands := make([]string, 0, len(b.CompatibleBrands))
for _, brand := range b.CompatibleBrands {
brands = append(brands, string(brand.CompatibleBrand[:]))
}
if len(brands) > 0 {
meta["Compatible Brands"] = strings.Join(brands, ", ")
}
case *mp4.Mvhd: // movie header (overall declarations)
if item.Timestamp.IsZero() {
// (only difference between V0 and V1 is bit length of integer)
if creationTime := b.GetCreationTime(); creationTime != 0 {
item.Timestamp = isoIEC14496Timestamp(creationTime)
}
}
if modifTime := b.GetModificationTime(); modifTime != 0 {
meta["Modification Time"] = isoIEC14496Timestamp(modifTime)
}
meta["Duration"] = float64(b.GetDuration()) / float64(b.Timescale)
meta["Rate"] = b.GetRate()
meta["Volume"] = b.Volume
case *mp4.Tkhd: // track header
// just in case (for some reason) the mvhd box didn't have this info
if item.Timestamp.IsZero() {
if creationTime := b.GetCreationTime(); creationTime != 0 {
item.Timestamp = isoIEC14496Timestamp(creationTime)
}
}
if width := b.GetWidthInt(); width > 0 {
meta[fmt.Sprintf("Track %d Width", b.TrackID)] = width
}
if height := b.GetHeightInt(); height > 0 {
meta[fmt.Sprintf("Track %d Height", b.TrackID)] = height
}
meta[fmt.Sprintf("Track %d Flags", b.TrackID)] = fmt.Sprintf("%#x", b.GetFlags())
}
// traverse child nodes
return h.Expand()
} else if h.BoxInfo.Context.UnderUdta && h.BoxInfo.Type == [4]byte{'©', 'x', 'y', 'z'} {
// Google cameras store location data in this box
var buf bytes.Buffer
_, err := h.ReadData(&buf)
if err != nil {
return nil, fmt.Errorf("reading ©xyz box data: %w", err)
}
loc, err := mp4XYZCoordsToLocation(buf.String())
if err != nil {
logger.Error("parsing location from ©xyz",
zap.Error(err),
zap.String("©xyz", buf.String()))
} else {
item.Location = loc
}
}
return nil, nil
})
return meta, err
}
// readAudioMetadata reads tag metadata from the file and returns it, along with any embedded picture.
func readAudioMetadata(logger *zap.Logger, fileSeeker io.ReadSeeker) (timeline.Metadata, *tag.Picture, error) {
m, err := tag.ReadFrom(fileSeeker)
if err != nil {
if errors.Is(err, tag.ErrNoTagsFound) {
logger.Debug("no audio metadata found", zap.Error(err))
return nil, nil, nil
}
return nil, nil, err
}
meta := make(timeline.Metadata)
meta["Format"] = m.Format()
if m.FileType() != "" {
meta["File type"] = m.FileType()
}
meta["Title"] = m.Title()
meta["Album"] = m.Album()
meta["Artist"] = m.Artist()
meta["Album Artist"] = m.AlbumArtist()
meta["Composer"] = m.Composer()
meta["Year"] = m.Year()
meta["Genre"] = m.Genre()
meta["Lyrics"] = m.Lyrics()
meta["Comment"] = m.Comment()
track, totalTracks := m.Track()
if track > 0 {
meta["Track"] = track
} else if totalTracks > 0 {
meta["Total Tracks"] = totalTracks
}
disc, totalDiscs := m.Disc()
if disc > 0 {
meta["Disc"] = disc
} else if totalDiscs > 0 {
meta["Total Discs"] = totalDiscs
}
// additional fields from https://exiftool.org/TagNames/ID3.html
raw := m.Raw()
meta["Copyright"] = raw["TCOP"]
meta["Band"] = raw["TPE2"]
meta["Conductor"] = raw["TPE3"]
// TODO: there's dozens more, including time and date, language, URLs, etc.
return meta, m.Picture(), nil
}
// mp4XYZCoordsToLocation parses the box called ©xyz which, at least on Google media,
// is formatted like "*data+50.1234-101.1234+000.000/" - I assume this is +X-Y+Z, i.e.
// +Lat-Lon+Alt. MUST CONTAIN NO MORE THAN 4 DIGITS AFTER THE DECIMAL (according to spec;
// our code will, of course, handle it just fine).
func mp4XYZCoordsToLocation(xyzRaw string) (timeline.Location, error) {
matches := cXYZCoordsRegex.FindStringSubmatch(xyzRaw)
const minMatches = 4
if len(matches) < minMatches {
return timeline.Location{}, fmt.Errorf("lat+lon not found in expected format in input string '%s'", xyzRaw)
}
latStr, lonStr := matches[1], matches[3]
lat, err := strconv.ParseFloat(latStr, 64)
if err != nil {
return timeline.Location{}, fmt.Errorf("converting latitude from '%s': %w", latStr, err)
}
lon, err := strconv.ParseFloat(lonStr, 64)
if err != nil {
return timeline.Location{}, fmt.Errorf("converting longitude from '%s': %w", lonStr, err)
}
return timeline.Location{
Latitude: &lat,
Longitude: &lon,
}, nil
}
// splitCamelCaseIntoWords splits camel-cased strings into words by inserting
// spaces at the most sensible places. This algorithm isn't perfect as it doesn't
// use a dictionary, but it's pretty good for EXIF and most other use cases.
func splitCamelCaseIntoWords(s string) string {
var sb strings.Builder
for i, ch := range s {
// whether current character is upper or lower cased
u := upper(ch)
l := lower(ch)
// previous is upper, next is upper, next is lower
// (defaults depend on if we're at beginning or end of string)
pu, nu, nl := i == 0, i >= len(s)-1, i >= len(s)-1
if i > 0 {
pu = upper(rune(s[i-1]))
}
if i < len(s)-1 {
nu = upper(rune(s[i+1]))
nl = lower(rune(s[i+1]))
}
// insert a space before current char if:
// - we are upper and previous is not upper,
// - we are upper and next is not upper,
// - we are neither upper nor lower (i.e. non-alpha), and next is also not upper or lower
if i > 0 && ((u && !pu) || (u && !nu) || (!u && !l && !nu && !nl)) {
sb.WriteRune(' ')
}
sb.WriteRune(ch)
}
return sb.String()
}
func upper(ch rune) bool {
return ch >= 65 && ch <= 90 // ASCII only
}
func lower(ch rune) bool {
return ch >= 97 && ch <= 122 // ASCII only
}
// isoIEC14496Timestamp converts the number of seconds since January 1, 1904 (as
// defined by ISO/IEC 14496-12 5th Edition [2015], page 23) to a normal time.Time
// value based on Unix epoch.
func isoIEC14496Timestamp(ts uint64) time.Time {
if ts == isoIEC14496_12_5thEdition_2015EpochToUnixEpochSeconds {
return time.Time{}
}
unixSec := ts - isoIEC14496_12_5thEdition_2015EpochToUnixEpochSeconds
// This timestamp standard is in UTC time, not local time, so we need to be sure to call .UTC()
// (This could technically overflow but I don't think the incoming timestamp is going to be THAT big)
//nolint:gosec
return time.Unix(int64(unixSec), 0).UTC()
}
// The difference between January 1, 1904 (the Mac epoch used by MP4 file metadata)
// and January 1, 1970 (the Unix epoch) in seconds.
const isoIEC14496_12_5thEdition_2015EpochToUnixEpochSeconds uint64 = 2082844800 //nolint // Yeah screw it, I'm using underscores for this one
// 3731705977 - 2082844800 = 1648861177 = absolute time it happened (Apr 2 12:59:37 AM UTC), but obviously in Hawaii time, it was 10 hours earlier.
// Regex to extract lat-lon data from the ©xyz field of MP4 metadata which
// takes the form: "*data+50.1234-101.1234+000.000/" in North America.
// And sometimes the +000.000 is missing (sometimes there's just a trailing slash).
var cXYZCoordsRegex = regexp.MustCompile(`((\+|-)\d+\.\d+)((\+|-)\d+\.\d+)`)
var bufPool = sync.Pool{
New: func() any {
return new(bytes.Buffer)
},
}