Move DepositFiles into its own IE

2013-06-23 21:06:20 +02:00 · 2013-06-23 21:06:20 +02:00 · 426ff04282
commit 426ff04282
parent a50e1b32e4
2 changed files with 61 additions and 44 deletions
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -23,6 +23,7 @@ from .extractor.arte import ArteTvIE
 from .extractor.bliptv import BlipTVIE, BlipTVUserIE
 from .extractor.comedycentral import ComedyCentralIE
 from .extractor.dailymotion import DailymotionIE
 from .extractor.depositfiles import DepositFilesIE
 from .extractor.facebook import FacebookIE
 from .extractor.gametrailers import GametrailersIE
 from .extractor.generic import GenericIE
@ -56,50 +57,6 @@ from .extractor.zdf import ZDFIE
 class DepositFilesIE(InfoExtractor):
    """Information extractor for depositfiles.com"""
    _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
    def _real_extract(self, url):
        file_id = url.split('/')[-1]
        # Rebuild url in english locale
        url = 'http://depositfiles.com/en/files/' + file_id
        # Retrieve file webpage with 'Free download' button pressed
        free_download_indication = { 'gateway_result' : '1' }
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
        try:
            self.report_download_webpage(file_id)
            webpage = compat_urllib_request.urlopen(request).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
        # Search for the real file URL
        mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
        if (mobj is None) or (mobj.group(1) is None):
            # Try to figure out reason of the error.
            mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
            if (mobj is not None) and (mobj.group(1) is not None):
                restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
                raise ExtractorError(u'%s' % restriction_message)
            else:
                raise ExtractorError(u'Unable to extract download URL from: %s' % url)
        file_url = mobj.group(1)
        file_extension = os.path.splitext(file_url)[1][1:]
        # Search for file title
        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
        return [{
            'id':       file_id.decode('utf-8'),
            'url':      file_url.decode('utf-8'),
            'uploader': None,
            'upload_date':  None,
            'title':    file_title,
            'ext':      file_extension.decode('utf-8'),
        }]
--- a/youtube_dl/extractor/depositfiles.py
+++ b/youtube_dl/extractor/depositfiles.py
@ -0,0 +1,60 @@
 import re
 import os
 import socket
 from .common import InfoExtractor
 from ..utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_request,
    ExtractorError,
 )
 class DepositFilesIE(InfoExtractor):
    """Information extractor for depositfiles.com"""
    _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
    def _real_extract(self, url):
        file_id = url.split('/')[-1]
        # Rebuild url in english locale
        url = 'http://depositfiles.com/en/files/' + file_id
        # Retrieve file webpage with 'Free download' button pressed
        free_download_indication = { 'gateway_result' : '1' }
        request = compat_urllib_request.Request(url, compat_urllib_parse.urlencode(free_download_indication))
        try:
            self.report_download_webpage(file_id)
            webpage = compat_urllib_request.urlopen(request).read()
        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
            raise ExtractorError(u'Unable to retrieve file webpage: %s' % compat_str(err))
        # Search for the real file URL
        mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
        if (mobj is None) or (mobj.group(1) is None):
            # Try to figure out reason of the error.
            mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
            if (mobj is not None) and (mobj.group(1) is not None):
                restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
                raise ExtractorError(u'%s' % restriction_message)
            else:
                raise ExtractorError(u'Unable to extract download URL from: %s' % url)
        file_url = mobj.group(1)
        file_extension = os.path.splitext(file_url)[1][1:]
        # Search for file title
        file_title = self._search_regex(r'<b title="(.*?)">', webpage, u'title')
        return [{
            'id':       file_id.decode('utf-8'),
            'url':      file_url.decode('utf-8'),
            'uploader': None,
            'upload_date':  None,
            'title':    file_title,
            'ext':      file_extension.decode('utf-8'),
        }]