[yahoo] Add an extractor for yahoo news (closes #1849)

This commit is contained in:
Jaime Marquínez Ferrándiz 2013-11-29 15:25:43 +01:00
parent 5f077efcb1
commit befd88b786
2 changed files with 38 additions and 2 deletions

View file

@ -172,7 +172,11 @@ from .xhamster import XHamsterIE
from .xnxx import XNXXIE from .xnxx import XNXXIE
from .xvideos import XVideosIE from .xvideos import XVideosIE
from .xtube import XTubeIE from .xtube import XTubeIE
from .yahoo import YahooIE, YahooSearchIE from .yahoo import (
YahooIE,
YahooNewsIE,
YahooSearchIE,
)
from .youjizz import YouJizzIE from .youjizz import YouJizzIE
from .youku import YoukuIE from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE

View file

@ -53,8 +53,11 @@ class YahooIE(InfoExtractor):
# The 'meta' field is not always in the video webpage, we request it # The 'meta' field is not always in the video webpage, we request it
# from another page # from another page
long_id = info['id'] long_id = info['id']
return self._get_info(info['id'], video_id)
def _get_info(self, long_id, video_id):
query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"'
' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id) ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"' % long_id)
data = compat_urllib_parse.urlencode({ data = compat_urllib_parse.urlencode({
'q': query, 'q': query,
'env': 'prod', 'env': 'prod',
@ -100,6 +103,35 @@ class YahooIE(InfoExtractor):
} }
class YahooNewsIE(YahooIE):
IE_NAME = 'yahoo:news'
_VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
_TEST = {
u'url': u'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
u'info_dict': {
u'id': u'104538833',
u'ext': u'flv',
u'title': u'China Moses Is Crazy About the Blues',
u'description': u'md5:9900ab8cd5808175c7b3fe55b979bed0',
},
u'params': {
# Requires rtmpdump
u'skip_download': True,
},
}
# Overwrite YahooIE properties we don't want
_TESTS = []
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, u'long id')
return self._get_info(long_id, video_id)
class YahooSearchIE(SearchInfoExtractor): class YahooSearchIE(SearchInfoExtractor):
IE_DESC = u'Yahoo screen search' IE_DESC = u'Yahoo screen search'
_MAX_RESULTS = 1000 _MAX_RESULTS = 1000