Merge branch 'master' of github.com:rg3/youtube-dl
This commit is contained in:
commit
81df121dd3
6 changed files with 91 additions and 13 deletions
|
@ -14,6 +14,8 @@ from youtube_dl.utils import timeconvert
|
||||||
from youtube_dl.utils import sanitize_filename
|
from youtube_dl.utils import sanitize_filename
|
||||||
from youtube_dl.utils import unescapeHTML
|
from youtube_dl.utils import unescapeHTML
|
||||||
from youtube_dl.utils import orderedSet
|
from youtube_dl.utils import orderedSet
|
||||||
|
from youtube_dl.utils import DateRange
|
||||||
|
from youtube_dl.utils import unified_strdate
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
_compat_str = lambda b: b.decode('unicode-escape')
|
_compat_str = lambda b: b.decode('unicode-escape')
|
||||||
|
@ -95,6 +97,20 @@ class TestUtil(unittest.TestCase):
|
||||||
|
|
||||||
def test_unescape_html(self):
|
def test_unescape_html(self):
|
||||||
self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
|
self.assertEqual(unescapeHTML(_compat_str('%20;')), _compat_str('%20;'))
|
||||||
|
|
||||||
|
def test_daterange(self):
|
||||||
|
_20century = DateRange("19000101","20000101")
|
||||||
|
self.assertFalse("17890714" in _20century)
|
||||||
|
_ac = DateRange("00010101")
|
||||||
|
self.assertTrue("19690721" in _ac)
|
||||||
|
_firstmilenium = DateRange(end="10000101")
|
||||||
|
self.assertTrue("07110427" in _firstmilenium)
|
||||||
|
|
||||||
|
def test_unified_dates(self):
|
||||||
|
self.assertEqual(unified_strdate('December 21, 2010'), '20101221')
|
||||||
|
self.assertEqual(unified_strdate('8/7/2009'), '20090708')
|
||||||
|
self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214')
|
||||||
|
self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -71,6 +71,13 @@ class TestYoutubeLists(unittest.TestCase):
|
||||||
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
|
||||||
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
self.assertFalse('pElCt5oNDuI' in ytie_results)
|
||||||
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
self.assertFalse('KdPEApIVdWM' in ytie_results)
|
||||||
|
|
||||||
|
def test_youtube_playlist_empty(self):
|
||||||
|
dl = FakeDownloader()
|
||||||
|
ie = YoutubePlaylistIE(dl)
|
||||||
|
result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0]
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(len(result['entries']), 0)
|
||||||
|
|
||||||
def test_youtube_course(self):
|
def test_youtube_course(self):
|
||||||
dl = FakeDownloader()
|
dl = FakeDownloader()
|
||||||
|
|
|
@ -89,6 +89,7 @@ class FileDownloader(object):
|
||||||
keepvideo: Keep the video file after post-processing
|
keepvideo: Keep the video file after post-processing
|
||||||
min_filesize: Skip files smaller than this size
|
min_filesize: Skip files smaller than this size
|
||||||
max_filesize: Skip files larger than this size
|
max_filesize: Skip files larger than this size
|
||||||
|
daterange: A DateRange object, download only if the upload_date is in the range.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
params = None
|
params = None
|
||||||
|
@ -424,6 +425,11 @@ class FileDownloader(object):
|
||||||
if rejecttitle:
|
if rejecttitle:
|
||||||
if re.search(rejecttitle, title, re.IGNORECASE):
|
if re.search(rejecttitle, title, re.IGNORECASE):
|
||||||
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
|
||||||
|
date = info_dict.get('upload_date', None)
|
||||||
|
if date is not None:
|
||||||
|
dateRange = self.params.get('daterange', DateRange())
|
||||||
|
if date not in dateRange:
|
||||||
|
return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def extract_info(self, url, download = True, ie_name = None):
|
def extract_info(self, url, download = True, ie_name = None):
|
||||||
|
|
|
@ -562,12 +562,7 @@ class YoutubeIE(InfoExtractor):
|
||||||
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
|
||||||
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
|
upload_date = unified_strdate(upload_date)
|
||||||
for expression in format_expressions:
|
|
||||||
try:
|
|
||||||
upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# description
|
# description
|
||||||
video_description = get_element_by_id("eow-description", video_webpage)
|
video_description = get_element_by_id("eow-description", video_webpage)
|
||||||
|
@ -1723,12 +1718,11 @@ class YoutubePlaylistIE(InfoExtractor):
|
||||||
if 'feed' not in response:
|
if 'feed' not in response:
|
||||||
self._downloader.report_error(u'Got a malformed response from YouTube API')
|
self._downloader.report_error(u'Got a malformed response from YouTube API')
|
||||||
return
|
return
|
||||||
|
playlist_title = response['feed']['title']['$t']
|
||||||
if 'entry' not in response['feed']:
|
if 'entry' not in response['feed']:
|
||||||
# Number of videos is a multiple of self._MAX_RESULTS
|
# Number of videos is a multiple of self._MAX_RESULTS
|
||||||
break
|
break
|
||||||
|
|
||||||
playlist_title = response['feed']['title']['$t']
|
|
||||||
|
|
||||||
videos += [ (entry['yt$position']['$t'], entry['content']['src'])
|
videos += [ (entry['yt$position']['$t'], entry['content']['src'])
|
||||||
for entry in response['feed']['entry']
|
for entry in response['feed']['entry']
|
||||||
if 'content' in entry ]
|
if 'content' in entry ]
|
||||||
|
@ -2386,7 +2380,7 @@ class ComedyCentralIE(InfoExtractor):
|
||||||
shortMediaId = mediaId.split(':')[-1]
|
shortMediaId = mediaId.split(':')[-1]
|
||||||
showId = mediaId.split(':')[-2].replace('.com', '')
|
showId = mediaId.split(':')[-2].replace('.com', '')
|
||||||
officialTitle = itemEl.findall('./title')[0].text
|
officialTitle = itemEl.findall('./title')[0].text
|
||||||
officialDate = itemEl.findall('./pubDate')[0].text
|
officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text)
|
||||||
|
|
||||||
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
|
||||||
compat_urllib_parse.urlencode({'uri': mediaId}))
|
compat_urllib_parse.urlencode({'uri': mediaId}))
|
||||||
|
@ -2696,12 +2690,13 @@ class SoundcloudIE(InfoExtractor):
|
||||||
|
|
||||||
streams = json.loads(stream_json)
|
streams = json.loads(stream_json)
|
||||||
mediaURL = streams['http_mp3_128_url']
|
mediaURL = streams['http_mp3_128_url']
|
||||||
|
upload_date = unified_strdate(info['created_at'])
|
||||||
|
|
||||||
return [{
|
return [{
|
||||||
'id': info['id'],
|
'id': info['id'],
|
||||||
'url': mediaURL,
|
'url': mediaURL,
|
||||||
'uploader': info['user']['username'],
|
'uploader': info['user']['username'],
|
||||||
'upload_date': info['created_at'],
|
'upload_date': upload_date,
|
||||||
'title': info['title'],
|
'title': info['title'],
|
||||||
'ext': u'mp3',
|
'ext': u'mp3',
|
||||||
'description': info['description'],
|
'description': info['description'],
|
||||||
|
@ -3561,6 +3556,7 @@ class FunnyOrDieIE(InfoExtractor):
|
||||||
|
|
||||||
class SteamIE(InfoExtractor):
|
class SteamIE(InfoExtractor):
|
||||||
_VALID_URL = r"""http://store.steampowered.com/
|
_VALID_URL = r"""http://store.steampowered.com/
|
||||||
|
(agecheck/)?
|
||||||
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
(?P<urltype>video|app)/ #If the page is only for videos or for a game
|
||||||
(?P<gameID>\d+)/?
|
(?P<gameID>\d+)/?
|
||||||
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
|
(?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
|
||||||
|
@ -3759,7 +3755,7 @@ class YouPornIE(InfoExtractor):
|
||||||
self._downloader.report_warning(u'unable to extract video date')
|
self._downloader.report_warning(u'unable to extract video date')
|
||||||
upload_date = None
|
upload_date = None
|
||||||
else:
|
else:
|
||||||
upload_date = result.group('date').strip()
|
upload_date = unified_strdate(result.group('date').strip())
|
||||||
|
|
||||||
# Get the video uploader
|
# Get the video uploader
|
||||||
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
|
result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
|
||||||
|
@ -3866,7 +3862,7 @@ class PornotubeIE(InfoExtractor):
|
||||||
if result is None:
|
if result is None:
|
||||||
self._downloader.report_error(u'unable to extract video title')
|
self._downloader.report_error(u'unable to extract video title')
|
||||||
return
|
return
|
||||||
upload_date = result.group('date')
|
upload_date = unified_strdate(result.group('date'))
|
||||||
|
|
||||||
info = {'id': video_id,
|
info = {'id': video_id,
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
|
@ -157,6 +157,9 @@ def parseOpts(overrideArguments=None):
|
||||||
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
|
||||||
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
|
||||||
|
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
|
||||||
|
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
|
||||||
|
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
|
||||||
|
|
||||||
|
|
||||||
authentication.add_option('-u', '--username',
|
authentication.add_option('-u', '--username',
|
||||||
|
@ -447,6 +450,10 @@ def _real_main(argv=None):
|
||||||
if opts.recodevideo is not None:
|
if opts.recodevideo is not None:
|
||||||
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
|
if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']:
|
||||||
parser.error(u'invalid video recode format specified')
|
parser.error(u'invalid video recode format specified')
|
||||||
|
if opts.date is not None:
|
||||||
|
date = DateRange.day(opts.date)
|
||||||
|
else:
|
||||||
|
date = DateRange(opts.dateafter, opts.datebefore)
|
||||||
|
|
||||||
if sys.version_info < (3,):
|
if sys.version_info < (3,):
|
||||||
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
|
||||||
|
@ -513,7 +520,8 @@ def _real_main(argv=None):
|
||||||
'test': opts.test,
|
'test': opts.test,
|
||||||
'keepvideo': opts.keepvideo,
|
'keepvideo': opts.keepvideo,
|
||||||
'min_filesize': opts.min_filesize,
|
'min_filesize': opts.min_filesize,
|
||||||
'max_filesize': opts.max_filesize
|
'max_filesize': opts.max_filesize,
|
||||||
|
'daterange': date
|
||||||
})
|
})
|
||||||
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
|
|
|
@ -12,6 +12,7 @@ import traceback
|
||||||
import zlib
|
import zlib
|
||||||
import email.utils
|
import email.utils
|
||||||
import json
|
import json
|
||||||
|
import datetime
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.request as compat_urllib_request
|
import urllib.request as compat_urllib_request
|
||||||
|
@ -568,3 +569,47 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
||||||
|
|
||||||
https_request = http_request
|
https_request = http_request
|
||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
def unified_strdate(date_str):
|
||||||
|
"""Return a string with the date in the format YYYYMMDD"""
|
||||||
|
upload_date = None
|
||||||
|
#Replace commas
|
||||||
|
date_str = date_str.replace(',',' ')
|
||||||
|
# %z (UTC offset) is only supported in python>=3.2
|
||||||
|
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
|
||||||
|
format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S']
|
||||||
|
for expression in format_expressions:
|
||||||
|
try:
|
||||||
|
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return upload_date
|
||||||
|
|
||||||
|
def date_from_str(date_str):
|
||||||
|
"""Return a datetime object from a string in the format YYYYMMDD"""
|
||||||
|
return datetime.datetime.strptime(date_str, "%Y%m%d").date()
|
||||||
|
|
||||||
|
class DateRange(object):
|
||||||
|
"""Represents a time interval between two dates"""
|
||||||
|
def __init__(self, start=None, end=None):
|
||||||
|
"""start and end must be strings in the format accepted by date"""
|
||||||
|
if start is not None:
|
||||||
|
self.start = date_from_str(start)
|
||||||
|
else:
|
||||||
|
self.start = datetime.datetime.min.date()
|
||||||
|
if end is not None:
|
||||||
|
self.end = date_from_str(end)
|
||||||
|
else:
|
||||||
|
self.end = datetime.datetime.max.date()
|
||||||
|
if self.start >= self.end:
|
||||||
|
raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
|
||||||
|
@classmethod
|
||||||
|
def day(cls, day):
|
||||||
|
"""Returns a range that only contains the given day"""
|
||||||
|
return cls(day,day)
|
||||||
|
def __contains__(self, date):
|
||||||
|
"""Check if the date is in the range"""
|
||||||
|
date = date_from_str(date)
|
||||||
|
return self.start <= date and date <= self.end
|
||||||
|
def __str__(self):
|
||||||
|
return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
|
||||||
|
|
Loading…
Reference in a new issue