[cnn] Add an extractor for blogs (closes #2361)

This commit is contained in:
Jaime Marquínez Ferrándiz 2014-02-11 14:38:17 +01:00
parent def630e523
commit 0ae6b01937
2 changed files with 30 additions and 1 deletions

View file

@ -32,7 +32,10 @@ from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
from .cmt import CMTIE from .cmt import CMTIE
from .cnn import CNNIE from .cnn import (
CNNIE,
CNNBlogsIE,
)
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE

View file

@ -6,6 +6,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
url_basename,
) )
@ -98,3 +99,28 @@ class CNNIE(InfoExtractor):
'duration': duration, 'duration': duration,
'upload_date': upload_date, 'upload_date': upload_date,
} }
class CNNBlogsIE(InfoExtractor):
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return {
'_type': 'url',
'url': cnn_url,
'ie_key': CNNIE.ie_key(),
}