diff --git a/test/parameters.json b/test/parameters.json
index 8215d25c5..96998b5c3 100644
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -29,6 +29,7 @@
"simulate": false,
"skip_download": false,
"subtitleslang": null,
+ "subtitlesformat": "srt",
"test": true,
"updatetime": true,
"usenetrc": false,
@@ -36,5 +37,8 @@
"verbose": true,
"writedescription": false,
"writeinfojson": true,
- "writesubtitles": false
-}
\ No newline at end of file
+ "writesubtitles": false,
+ "onlysubtitles": false,
+ "allsubtitles": false,
+ "listssubtitles": false
+}
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index 5d3566a35..30f2246dd 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -38,20 +38,63 @@ class FakeDownloader(object):
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class TestYoutubeSubtitles(unittest.TestCase):
+ def setUp(self):
+ DL = FakeDownloader()
+ DL.params['allsubtitles'] = False
+ DL.params['writesubtitles'] = False
+ DL.params['subtitlesformat'] = 'srt'
+ DL.params['listsubtitles'] = False
+ def test_youtube_no_subtitles(self):
+ DL = FakeDownloader()
+ DL.params['writesubtitles'] = False
+ IE = YoutubeIE(DL)
+ info_dict = IE.extract('QRS8MkLhQmM')
+ subtitles = info_dict[0]['subtitles']
+ self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
DL = FakeDownloader()
DL.params['writesubtitles'] = True
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
- self.assertEqual(md5(info_dict[0]['subtitles']), 'c3228550d59116f3c29fba370b55d033')
-
+ sub = info_dict[0]['subtitles'][0]
+ self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_it(self):
DL = FakeDownloader()
DL.params['writesubtitles'] = True
DL.params['subtitleslang'] = 'it'
IE = YoutubeIE(DL)
info_dict = IE.extract('QRS8MkLhQmM')
- self.assertEqual(md5(info_dict[0]['subtitles']), '132a88a0daf8e1520f393eb58f1f646a')
+ sub = info_dict[0]['subtitles'][0]
+ self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')
+ def test_youtube_onlysubtitles(self):
+ DL = FakeDownloader()
+ DL.params['writesubtitles'] = True
+ DL.params['onlysubtitles'] = True
+ IE = YoutubeIE(DL)
+ info_dict = IE.extract('QRS8MkLhQmM')
+ sub = info_dict[0]['subtitles'][0]
+ self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')
+ def test_youtube_allsubtitles(self):
+ DL = FakeDownloader()
+ DL.params['allsubtitles'] = True
+ IE = YoutubeIE(DL)
+ info_dict = IE.extract('QRS8MkLhQmM')
+ subtitles = info_dict[0]['subtitles']
+ self.assertEqual(len(subtitles), 12)
+ def test_youtube_subtitles_format(self):
+ DL = FakeDownloader()
+ DL.params['writesubtitles'] = True
+ DL.params['subtitlesformat'] = 'sbv'
+ IE = YoutubeIE(DL)
+ info_dict = IE.extract('QRS8MkLhQmM')
+ sub = info_dict[0]['subtitles'][0]
+ self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b')
+ def test_youtube_list_subtitles(self):
+ DL = FakeDownloader()
+ DL.params['listsubtitles'] = True
+ IE = YoutubeIE(DL)
+ info_dict = IE.extract('QRS8MkLhQmM')
+ self.assertEqual(info_dict, None)
if __name__ == '__main__':
unittest.main()
diff --git a/test/tests.json b/test/tests.json
index 7af3c2892..fd9d33332 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -293,5 +293,20 @@
"info_dict": {
"title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2"
}
+ },
+ {
+ "name": "Generic",
+ "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html",
+ "file": "13601338388002.mp4",
+ "md5": "85b90ccc9d73b4acd9138d3af4c27f89"
+ },
+ {
+ "name": "Spiegel",
+ "url": "http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html",
+ "file": "1259285.mp4",
+ "md5": "2c2754212136f35fb4b19767d242f66e",
+ "info_dict": {
+ "title": "Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv"
+ }
}
]
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 68fad11bc..6af2acbee 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -78,7 +78,11 @@ class FileDownloader(object):
updatetime: Use the Last-modified header to set output file timestamps.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
- writesubtitles: Write the video subtitles to a .srt file
+ writesubtitles: Write the video subtitles to a file
+ onlysubtitles: Downloads only the subtitles of the video
+ allsubtitles: Downloads all the subtitles of the video
+ listsubtitles: Lists all available subtitles for the video
+ subtitlesformat: Subtitle format [sbv/srt] (default=srt)
subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader.
keepvideo: Keep the video file after post-processing
@@ -301,9 +305,9 @@ class FileDownloader(object):
""" Report that the description file is being written """
self.to_screen(u'[info] Writing video description to: ' + descfn)
- def report_writesubtitles(self, srtfn):
+ def report_writesubtitles(self, sub_filename):
""" Report that the subtitles file is being written """
- self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+ self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
def report_writeinfojson(self, infofn):
""" Report that the metadata file has been written """
@@ -372,8 +376,11 @@ class FileDownloader(object):
filename = self.params['outtmpl'] % template_dict
return filename
- except (ValueError, KeyError) as err:
- self.trouble(u'ERROR: invalid system charset or erroneous output template')
+ except KeyError as err:
+ self.trouble(u'ERROR: Erroneous output template')
+ return None
+ except ValueError as err:
+ self.trouble(u'ERROR: Insufficient system charset ' + repr(preferredencoding()))
return None
def _match_entry(self, info_dict):
@@ -519,14 +526,35 @@ class FileDownloader(object):
if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
+ subtitle = info_dict['subtitles'][0]
+ (sub_error, sub_lang, sub) = subtitle
+ sub_format = self.params.get('subtitlesformat')
try:
- srtfn = filename.rsplit('.', 1)[0] + u'.srt'
- self.report_writesubtitles(srtfn)
- with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
- srtfile.write(info_dict['subtitles'])
+ sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+ self.report_writesubtitles(sub_filename)
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+ subfile.write(sub)
except (OSError, IOError):
self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
return
+ if self.params.get('onlysubtitles', False):
+ return
+
+ if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+ subtitles = info_dict['subtitles']
+ sub_format = self.params.get('subtitlesformat')
+ for subtitle in subtitles:
+ (sub_error, sub_lang, sub) = subtitle
+ try:
+ sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+ self.report_writesubtitles(sub_filename)
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+ subfile.write(sub)
+ except (OSError, IOError):
+ self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
+ return
+ if self.params.get('onlysubtitles', False):
+ return
if self.params.get('writeinfojson', False):
infofn = filename + u'.info.json'
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index e714fa6b0..dd4a776e4 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -48,7 +48,7 @@ class InfoExtractor(object):
uploader_id: Nickname or id of the video uploader.
location: Physical location of the video.
player_url: SWF Player URL (used for rtmpdump).
- subtitles: The .srt file contents.
+ subtitles: The subtitle file contents.
urlhandle: [internal] The urlHandle to be used to download the file,
like returned by urllib.request.urlopen
@@ -126,8 +126,14 @@ class InfoExtractor(object):
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the data of the page as a string """
urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+ content_type = urlh.headers.get('Content-Type', '')
+ m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+ if m:
+ encoding = m.group(1)
+ else:
+ encoding = 'utf-8'
webpage_bytes = urlh.read()
- return webpage_bytes.decode('utf-8', 'replace')
+ return webpage_bytes.decode(encoding, 'replace')
#Methods for following #608
#They set the correct value of the '_type' key
@@ -236,7 +242,16 @@ class YoutubeIE(InfoExtractor):
def report_video_subtitles_download(self, video_id):
"""Report attempt to download video info webpage."""
- self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
+ self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id)
+
+ def report_video_subtitles_request(self, video_id, sub_lang, format):
+ """Report attempt to download video info webpage."""
+ self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
+
+ def report_video_subtitles_available(self, video_id, sub_lang_list):
+ """Report available subtitles."""
+ sub_lang = ",".join(list(sub_lang_list.keys()))
+ self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang))
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
@@ -250,55 +265,63 @@ class YoutubeIE(InfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self._downloader.to_screen(u'[youtube] RTMP download detected')
- def _closed_captions_xml_to_srt(self, xml_string):
- srt = ''
- texts = re.findall(r'