[mtvservices:embedded] Use another endpoint to get feed URL
Closes #10363 In the original mtvservices:embedded test case, config.xml is still used to get the feed URL. Some other examples, including test_Generic_40 (http://www.vulture.com/2016/06/new-key-peele-sketches-released.html), and the video mentioned in #10363, use another endpoint to get the feed URL. The 'index.html' approach works for the original test case, too. So I didn't keep the old approach.
This commit is contained in:
		
							parent
							
								
									97653f81b2
								
							
						
					
					
						commit
						0c75abbb7b
					
				
					 4 changed files with 23 additions and 20 deletions
				
			
		| 
						 | 
				
			
			@ -1,3 +1,9 @@
 | 
			
		|||
version <unreleased>
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
version 2016.08.24.1
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,6 @@ from __future__ import unicode_literals
 | 
			
		|||
 | 
			
		||||
from .mtv import MTVServicesInfoExtractor
 | 
			
		||||
from ..utils import unified_strdate
 | 
			
		||||
from ..compat import compat_urllib_parse_urlencode
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BetIE(MTVServicesInfoExtractor):
 | 
			
		||||
| 
						 | 
				
			
			@ -53,9 +52,9 @@ class BetIE(MTVServicesInfoExtractor):
 | 
			
		|||
    _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player"
 | 
			
		||||
 | 
			
		||||
    def _get_feed_query(self, uri):
 | 
			
		||||
        return compat_urllib_parse_urlencode({
 | 
			
		||||
        return {
 | 
			
		||||
            'uuid': uri,
 | 
			
		||||
        })
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_mgid(self, webpage):
 | 
			
		||||
        return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,7 +4,6 @@ import re
 | 
			
		|||
 | 
			
		||||
from .common import InfoExtractor
 | 
			
		||||
from ..compat import (
 | 
			
		||||
    compat_urllib_parse_urlencode,
 | 
			
		||||
    compat_str,
 | 
			
		||||
    compat_xpath,
 | 
			
		||||
)
 | 
			
		||||
| 
						 | 
				
			
			@ -14,12 +13,13 @@ from ..utils import (
 | 
			
		|||
    fix_xml_ampersands,
 | 
			
		||||
    float_or_none,
 | 
			
		||||
    HEADRequest,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    strip_or_none,
 | 
			
		||||
    timeconvert,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
    update_url_query,
 | 
			
		||||
    url_basename,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    xpath_text,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -36,6 +36,11 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		|||
    def _id_from_uri(uri):
 | 
			
		||||
        return uri.split(':')[-1]
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _remove_template_parameter(url):
 | 
			
		||||
        # Remove the templates, like &device={device}
 | 
			
		||||
        return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
 | 
			
		||||
 | 
			
		||||
    # This was originally implemented for ComedyCentral, but it also works here
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _transform_rtmp_url(cls, rtmp_video_url):
 | 
			
		||||
| 
						 | 
				
			
			@ -117,9 +122,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		|||
        video_id = self._id_from_uri(uri)
 | 
			
		||||
        self.report_extraction(video_id)
 | 
			
		||||
        content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
 | 
			
		||||
        mediagen_url = content_el.attrib['url']
 | 
			
		||||
        # Remove the templates, like &device={device}
 | 
			
		||||
        mediagen_url = re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', mediagen_url)
 | 
			
		||||
        mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
 | 
			
		||||
        if 'acceptMethods' not in mediagen_url:
 | 
			
		||||
            mediagen_url += '&' if '?' in mediagen_url else '?'
 | 
			
		||||
            mediagen_url += 'acceptMethods=fms'
 | 
			
		||||
| 
						 | 
				
			
			@ -178,12 +181,12 @@ class MTVServicesInfoExtractor(InfoExtractor):
 | 
			
		|||
        data = {'uri': uri}
 | 
			
		||||
        if self._LANG:
 | 
			
		||||
            data['lang'] = self._LANG
 | 
			
		||||
        return compat_urllib_parse_urlencode(data)
 | 
			
		||||
        return data
 | 
			
		||||
 | 
			
		||||
    def _get_videos_info(self, uri):
 | 
			
		||||
        video_id = self._id_from_uri(uri)
 | 
			
		||||
        feed_url = self._get_feed_url(uri)
 | 
			
		||||
        info_url = feed_url + '?' + self._get_feed_query(uri)
 | 
			
		||||
        info_url = update_url_query(feed_url, self._get_feed_query(uri))
 | 
			
		||||
        return self._get_videos_info_from_url(info_url, video_id)
 | 
			
		||||
 | 
			
		||||
    def _get_videos_info_from_url(self, url, video_id):
 | 
			
		||||
| 
						 | 
				
			
			@ -256,13 +259,9 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
 | 
			
		|||
 | 
			
		||||
    def _get_feed_url(self, uri):
 | 
			
		||||
        video_id = self._id_from_uri(uri)
 | 
			
		||||
        site_id = uri.replace(video_id, '')
 | 
			
		||||
        config_url = ('http://media.mtvnservices.com/pmt-arc/e1/players/{0}/'
 | 
			
		||||
                      'context52/config.xml'.format(site_id))
 | 
			
		||||
        config_doc = self._download_xml(config_url, video_id)
 | 
			
		||||
        feed_node = config_doc.find('.//feed')
 | 
			
		||||
        feed_url = feed_node.text.strip().split('?')[0]
 | 
			
		||||
        return feed_url
 | 
			
		||||
        config = self._download_json(
 | 
			
		||||
            'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
 | 
			
		||||
        return self._remove_template_parameter(config['feedWithQueryParams'])
 | 
			
		||||
 | 
			
		||||
    def _real_extract(self, url):
 | 
			
		||||
        mobj = re.match(self._VALID_URL, url)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,7 +2,6 @@
 | 
			
		|||
from __future__ import unicode_literals
 | 
			
		||||
 | 
			
		||||
from .mtv import MTVServicesInfoExtractor
 | 
			
		||||
from ..compat import compat_urllib_parse_urlencode
 | 
			
		||||
from ..utils import update_url_query
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -59,10 +58,10 @@ class NickIE(MTVServicesInfoExtractor):
 | 
			
		|||
    }]
 | 
			
		||||
 | 
			
		||||
    def _get_feed_query(self, uri):
 | 
			
		||||
        return compat_urllib_parse_urlencode({
 | 
			
		||||
        return {
 | 
			
		||||
            'feed': 'nick_arc_player_prime',
 | 
			
		||||
            'mgid': uri,
 | 
			
		||||
        })
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    def _extract_mgid(self, webpage):
 | 
			
		||||
        return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid')
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue