Added '--xattrs' option which writes metadata to the file's extended attributes using a youtube-dl postprocessor.

Works on Linux, OSX, and Windows.
This commit is contained in:
epitron 2014-01-02 07:47:28 -05:00
parent 1b969041d7
commit e63fc1bed4
4 changed files with 139 additions and 2 deletions

View file

@ -189,7 +189,9 @@ which means you can modify it, redistribute it or use it however you like.
processed files are overwritten by default processed files are overwritten by default
--embed-subs embed subtitles in the video (only for mp4 --embed-subs embed subtitles in the video (only for mp4
videos) videos)
--add-metadata add metadata to the files --add-metadata write metadata to the video file
--xattrs write metadata to the video file's xattrs (using
dublin core and xdg standards)
# CONFIGURATION # CONFIGURATION

View file

@ -62,6 +62,7 @@ class FFmpegPostProcessorError(PostProcessingError):
class AudioConversionError(PostProcessingError): class AudioConversionError(PostProcessingError):
pass pass
class FFmpegPostProcessor(PostProcessor): class FFmpegPostProcessor(PostProcessor):
def __init__(self,downloader=None): def __init__(self,downloader=None):
PostProcessor.__init__(self, downloader) PostProcessor.__init__(self, downloader)
@ -107,6 +108,7 @@ class FFmpegPostProcessor(PostProcessor):
return u'./' + fn return u'./' + fn
return fn return fn
class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader) FFmpegPostProcessor.__init__(self, downloader)
@ -232,6 +234,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
information['filepath'] = new_path information['filepath'] = new_path
return self._nopostoverwrites,information return self._nopostoverwrites,information
class FFmpegVideoConvertor(FFmpegPostProcessor): class FFmpegVideoConvertor(FFmpegPostProcessor):
def __init__(self, downloader=None,preferedformat=None): def __init__(self, downloader=None,preferedformat=None):
super(FFmpegVideoConvertor, self).__init__(downloader) super(FFmpegVideoConvertor, self).__init__(downloader)
@ -509,3 +512,120 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
os.remove(encodeFilename(filename)) os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename))
return True, info return True, info
class XAttrMetadataPP(PostProcessor):
#
# More info about extended attributes for media:
# http://freedesktop.org/wiki/CommonExtendedAttributes/
# http://www.freedesktop.org/wiki/PhreedomDraft/
# http://dublincore.org/documents/usageguide/elements.shtml
#
# TODO:
# * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated)
# * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution'
#
def run(self, info):
""" Set extended attributes on downloaded file (if xattr support is found). """
from .utils import hyphenate_date
# This mess below finds the best xattr tool for the job and creates a
# "write_xattr" function.
try:
# try the pyxattr module...
import xattr
def write_xattr(path, key, value):
return xattr.setxattr(path, key, value)
except ImportError:
if os.name == 'posix':
def which(bin):
for dir in os.environ["PATH"].split(":"):
path = os.path.join(dir, bin)
if os.path.exists(path):
return path
user_has_setfattr = which("setfattr")
user_has_xattr = which("xattr")
if user_has_setfattr or user_has_xattr:
def write_xattr(path, key, value):
import errno
potential_errors = {
# setfattr: /tmp/blah: Operation not supported
"Operation not supported": errno.EOPNOTSUPP,
# setfattr: ~/blah: No such file or directory
# xattr: No such file: ~/blah
"No such file": errno.ENOENT,
}
if user_has_setfattr:
cmd = ['setfattr', '-n', key, '-v', value, path]
elif user_has_xattr:
cmd = ['xattr', '-w', key, value, path]
try:
output = subprocess.check_output(cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
errorstr = e.output.strip().decode()
for potential_errorstr, potential_errno in potential_errors.items():
if errorstr.find(potential_errorstr) > -1:
e = OSError(potential_errno, potential_errorstr)
e.__cause__ = None
raise e
raise # Reraise unhandled error
else:
# On Unix, and can't find pyxattr, setfattr, or xattr.
if sys.platform.startswith('linux'):
self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'pyxattr' or 'xattr' modules, or the GNU 'attr' package (which contains the 'setfattr' tool).")
elif sys.platform == 'darwin':
self._downloader.report_error("Couldn't find a tool to set the xattrs. Install either the python 'xattr' module, or the 'xattr' binary.")
else:
# Write xattrs to NTFS Alternate Data Streams: http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
def write_xattr(path, key, value):
assert(key.find(":") < 0)
assert(path.find(":") < 0)
assert(os.path.exists(path))
f = open(path+":"+key, "w")
f.write(value)
f.close()
# Write the metadata to the file's xattrs
self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs...')
filename = info['filepath']
try:
xattr_mapping = {
'user.xdg.referrer.url': 'webpage_url',
# 'user.xdg.comment': 'description',
'user.dublincore.title': 'title',
'user.dublincore.date': 'upload_date',
'user.dublincore.description': 'description',
'user.dublincore.contributor': 'uploader',
'user.dublincore.format': 'format',
}
for xattrname, infoname in xattr_mapping.items():
value = info.get(infoname)
if value:
if infoname == "upload_date":
value = hyphenate_date(value)
write_xattr(filename, xattrname, value)
return True, info
except OSError:
self._downloader.report_error("This filesystem doesn't support extended attributes. (You may have to enable them in your /etc/fstab)")
return False, info

View file

@ -38,6 +38,7 @@ __authors__ = (
'Takuya Tsuchida', 'Takuya Tsuchida',
'Sergey M.', 'Sergey M.',
'Michael Orlitzky', 'Michael Orlitzky',
'Chris Gahan',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'
@ -78,6 +79,7 @@ from .PostProcessor import (
FFmpegVideoConvertor, FFmpegVideoConvertor,
FFmpegExtractAudioPP, FFmpegExtractAudioPP,
FFmpegEmbedSubtitlePP, FFmpegEmbedSubtitlePP,
XAttrMetadataPP,
) )
@ -412,7 +414,9 @@ def parseOpts(overrideArguments=None):
postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False, postproc.add_option('--embed-subs', action='store_true', dest='embedsubtitles', default=False,
help='embed subtitles in the video (only for mp4 videos)') help='embed subtitles in the video (only for mp4 videos)')
postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False, postproc.add_option('--add-metadata', action='store_true', dest='addmetadata', default=False,
help='add metadata to the files') help='write metadata to the video file')
postproc.add_option('--xattrs', action='store_true', dest='xattrs', default=False,
help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
parser.add_option_group(general) parser.add_option_group(general)
@ -709,6 +713,8 @@ def _real_main(argv=None):
ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) ydl.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo))
if opts.embedsubtitles: if opts.embedsubtitles:
ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat)) ydl.add_post_processor(FFmpegEmbedSubtitlePP(subtitlesformat=opts.subtitlesformat))
if opts.xattrs:
ydl.add_post_processor(XAttrMetadataPP())
# Update version # Update version
if opts.update_self: if opts.update_self:

View file

@ -809,6 +809,15 @@ def date_from_str(date_str):
return today + delta return today + delta
return datetime.datetime.strptime(date_str, "%Y%m%d").date() return datetime.datetime.strptime(date_str, "%Y%m%d").date()
def hyphenate_date(date_str):
"""
Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
if match is not None:
return '-'.join(match.groups())
else:
return date_str
class DateRange(object): class DateRange(object):
"""Represents a time interval between two dates""" """Represents a time interval between two dates"""
def __init__(self, start=None, end=None): def __init__(self, start=None, end=None):