Create initial preview version of the new youtube-dl
This commit is contained in:
		
						commit
						4fa74b5252
					
				
					 2 changed files with 439 additions and 0 deletions
				
			
		
							
								
								
									
										1
									
								
								.hgignore
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.hgignore
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					youtube-dl-old
 | 
				
			||||||
							
								
								
									
										438
									
								
								youtube-dl
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										438
									
								
								youtube-dl
									
										
									
									
									
										Executable file
									
								
							| 
						 | 
					@ -0,0 +1,438 @@
 | 
				
			||||||
 | 
					#!/usr/bin/env python
 | 
				
			||||||
 | 
					# -*- coding: utf-8 -*-
 | 
				
			||||||
 | 
					# Author: Ricardo Garcia Gonzalez
 | 
				
			||||||
 | 
					# License: Public domain code
 | 
				
			||||||
 | 
					import htmlentitydefs
 | 
				
			||||||
 | 
					import httplib
 | 
				
			||||||
 | 
					import math
 | 
				
			||||||
 | 
					import netrc
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import os.path
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					import socket
 | 
				
			||||||
 | 
					import string
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import urllib
 | 
				
			||||||
 | 
					import urllib2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					std_headers = {	
 | 
				
			||||||
 | 
						'User-Agent': 'UserAgent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9) Gecko/2008052906 Firefox/3.0',
 | 
				
			||||||
 | 
						'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 | 
				
			||||||
 | 
						'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
 | 
				
			||||||
 | 
						'Accept-Language': 'en-us,en;q=0.5',
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class FileDownloader(object):
 | 
				
			||||||
 | 
						"""File Downloader class.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						File downloader objects are the ones responsible of downloading the
 | 
				
			||||||
 | 
						actual video file and writing it to disk if the user has requested
 | 
				
			||||||
 | 
						it, among some other tasks. In most cases there should be one per
 | 
				
			||||||
 | 
						program. As, given a video URL, the downloader doesn't know how to
 | 
				
			||||||
 | 
						extract all the needed information, task that InfoExtractors do, it
 | 
				
			||||||
 | 
						has to pass the URL to one of them.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						For this, file downloader objects have a method that allows
 | 
				
			||||||
 | 
						InfoExtractors to be registered in a given order. When it is passed
 | 
				
			||||||
 | 
						a URL, the file downloader handles it to the first InfoExtractor it
 | 
				
			||||||
 | 
						finds that reports it's able to handle it. The InfoExtractor returns
 | 
				
			||||||
 | 
						all the information to the FileDownloader and the latter downloads the
 | 
				
			||||||
 | 
						file or does whatever it's instructed to do.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						File downloaders accept a lot of parameters. In order not to saturate
 | 
				
			||||||
 | 
						the object constructor with arguments, it receives a dictionary of
 | 
				
			||||||
 | 
						options instead. These options are available through the get_params()
 | 
				
			||||||
 | 
						method for the InfoExtractors to use. The FileDownloader also registers
 | 
				
			||||||
 | 
						itself as the downloader in charge for the InfoExtractors that are
 | 
				
			||||||
 | 
						added to it, so this is a "mutual registration".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Available options:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						username:	Username for authentication purposes.
 | 
				
			||||||
 | 
						password:	Password for authentication purposes.
 | 
				
			||||||
 | 
						usenetrc:	Use netrc for authentication instead.
 | 
				
			||||||
 | 
						quiet:		Do not print messages to stdout.
 | 
				
			||||||
 | 
						format:		Video format code.
 | 
				
			||||||
 | 
						outtmpl:	Template for output names.
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						_params = None
 | 
				
			||||||
 | 
						_ies = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def __init__(self, params):
 | 
				
			||||||
 | 
							self._ies = []
 | 
				
			||||||
 | 
							self.set_params(params)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						@staticmethod
 | 
				
			||||||
 | 
						def pmkdir(filename):
 | 
				
			||||||
 | 
							"""Create directory components in filename. Similar to Unix "mkdir -p"."""
 | 
				
			||||||
 | 
							components = filename.split(os.sep)
 | 
				
			||||||
 | 
							aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 | 
				
			||||||
 | 
							for dir in aggregate:
 | 
				
			||||||
 | 
								if not os.path.exists(dir):
 | 
				
			||||||
 | 
									os.mkdir(dir)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						@staticmethod
 | 
				
			||||||
 | 
						def format_bytes(bytes):
 | 
				
			||||||
 | 
							if bytes is None:
 | 
				
			||||||
 | 
								return 'N/A'
 | 
				
			||||||
 | 
							if bytes == 0:
 | 
				
			||||||
 | 
								exponent = 0
 | 
				
			||||||
 | 
							else:
 | 
				
			||||||
 | 
								exponent = long(math.log(float(bytes), 1024.0))
 | 
				
			||||||
 | 
							suffix = 'bkMGTPEZY'[exponent]
 | 
				
			||||||
 | 
							if exponent == 0:
 | 
				
			||||||
 | 
								return '%s%s' % (bytes, suffix)
 | 
				
			||||||
 | 
							converted = float(bytes) / float(1024**exponent)
 | 
				
			||||||
 | 
							return '%.2f%s' % (converted, suffix)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						@staticmethod
 | 
				
			||||||
 | 
						def calc_percent(byte_counter, data_len):
 | 
				
			||||||
 | 
							if data_len is None:
 | 
				
			||||||
 | 
								return '---.-%'
 | 
				
			||||||
 | 
							return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						@staticmethod
 | 
				
			||||||
 | 
						def calc_eta(start, now, total, current):
 | 
				
			||||||
 | 
							if total is None:
 | 
				
			||||||
 | 
								return '--:--'
 | 
				
			||||||
 | 
							dif = now - start
 | 
				
			||||||
 | 
							if current == 0 or dif < 0.001: # One millisecond
 | 
				
			||||||
 | 
								return '--:--'
 | 
				
			||||||
 | 
							rate = float(current) / dif
 | 
				
			||||||
 | 
							eta = long((float(total) - float(current)) / rate)
 | 
				
			||||||
 | 
							(eta_mins, eta_secs) = divmod(eta, 60)
 | 
				
			||||||
 | 
							if eta_mins > 99:
 | 
				
			||||||
 | 
								return '--:--'
 | 
				
			||||||
 | 
							return '%02d:%02d' % (eta_mins, eta_secs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 	@staticmethod
 | 
				
			||||||
 | 
						def calc_speed(start, now, bytes):
 | 
				
			||||||
 | 
							dif = now - start
 | 
				
			||||||
 | 
							if bytes == 0 or dif < 0.001: # One millisecond
 | 
				
			||||||
 | 
								return '%9s' % 'N/A b/s'
 | 
				
			||||||
 | 
							return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						@staticmethod
 | 
				
			||||||
 | 
						def best_block_size(elapsed_time, bytes):
 | 
				
			||||||
 | 
							new_min = max(bytes / 2.0, 1.0)
 | 
				
			||||||
 | 
							new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 | 
				
			||||||
 | 
							if elapsed_time < 0.001:
 | 
				
			||||||
 | 
								return int(new_max)
 | 
				
			||||||
 | 
							rate = bytes / elapsed_time
 | 
				
			||||||
 | 
							if rate > new_max:
 | 
				
			||||||
 | 
								return int(new_max)
 | 
				
			||||||
 | 
							if rate < new_min:
 | 
				
			||||||
 | 
								return int(new_min)
 | 
				
			||||||
 | 
							return int(rate)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def set_params(self, params):
 | 
				
			||||||
 | 
							"""Sets parameters."""
 | 
				
			||||||
 | 
							if type(params) != dict:
 | 
				
			||||||
 | 
								raise ValueError('params: dictionary expected')
 | 
				
			||||||
 | 
							self._params = params
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						def get_params(self):
 | 
				
			||||||
 | 
							"""Get parameters."""
 | 
				
			||||||
 | 
							return self._params
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def add_info_extractor(self, ie):
 | 
				
			||||||
 | 
							"""Add an InfoExtractor object to the end of the list."""
 | 
				
			||||||
 | 
							self._ies.append(ie)
 | 
				
			||||||
 | 
							ie.set_downloader(self)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						def download(self, url_list):
 | 
				
			||||||
 | 
							"""Download a given list of URLs."""
 | 
				
			||||||
 | 
							for url in url_list:
 | 
				
			||||||
 | 
								suitable_found = False
 | 
				
			||||||
 | 
								for ie in self._ies:
 | 
				
			||||||
 | 
									if not ie.suitable(url):
 | 
				
			||||||
 | 
										continue
 | 
				
			||||||
 | 
									# Suitable InfoExtractor found
 | 
				
			||||||
 | 
									suitable_found = True
 | 
				
			||||||
 | 
									for result in ie.extract(url):
 | 
				
			||||||
 | 
										if result is None:
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
										try:
 | 
				
			||||||
 | 
											filename = self._params['outtmpl'] % result
 | 
				
			||||||
 | 
										except (KeyError), err:
 | 
				
			||||||
 | 
											sys.stderr.write('ERROR: invalid output template: %s\n' % str(err))
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
										try:
 | 
				
			||||||
 | 
											self.pmkdir(filename)
 | 
				
			||||||
 | 
										except (OSError, IOError), err:
 | 
				
			||||||
 | 
											sys.stderr.write('ERROR: unable to create directories: %s\n' % str(err))
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
										try:
 | 
				
			||||||
 | 
											outstream = open(filename, 'wb')
 | 
				
			||||||
 | 
										except (OSError, IOError), err:
 | 
				
			||||||
 | 
											sys.stderr.write('ERROR: unable to open for writing: %s\n' % str(err))
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
										try:
 | 
				
			||||||
 | 
											self._do_download(outstream, result['url'])
 | 
				
			||||||
 | 
											outstream.close()
 | 
				
			||||||
 | 
										except (OSError, IOError), err:
 | 
				
			||||||
 | 
											sys.stderr.write('ERROR: unable to write video data: %s\n' % str(err))
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
										except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 | 
				
			||||||
 | 
											sys.stderr.write('ERROR: unable to download video data: %s\n' % str(err))
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
									break
 | 
				
			||||||
 | 
								if not suitable_found:
 | 
				
			||||||
 | 
									sys.stderr.write('ERROR: no suitable InfoExtractor: %s\n' % url)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						def _do_download(self, stream, url):
 | 
				
			||||||
 | 
							request = urllib2.Request(url, None, std_headers)
 | 
				
			||||||
 | 
							data = urllib2.urlopen(request)
 | 
				
			||||||
 | 
							data_len = data.info().get('Content-length', None)
 | 
				
			||||||
 | 
							data_len_str = self.format_bytes(data_len)
 | 
				
			||||||
 | 
							byte_counter = 0
 | 
				
			||||||
 | 
							block_size = 1024
 | 
				
			||||||
 | 
							start = time.time()
 | 
				
			||||||
 | 
							while True:
 | 
				
			||||||
 | 
								percent_str = self.calc_percent(byte_counter, data_len)
 | 
				
			||||||
 | 
								eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 | 
				
			||||||
 | 
								speed_str = self.calc_speed(start, time.time(), byte_counter)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if not self._params.get('quiet', False):
 | 
				
			||||||
 | 
									sys.stdout.write('\r[download] %s of %s at %s ETA %s' %
 | 
				
			||||||
 | 
											(percent_str, data_len_str, speed_str, eta_str))
 | 
				
			||||||
 | 
									sys.stdout.flush()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								before = time.time()
 | 
				
			||||||
 | 
								data_block = data.read(block_size)
 | 
				
			||||||
 | 
								after = time.time()
 | 
				
			||||||
 | 
								data_block_len = len(data_block)
 | 
				
			||||||
 | 
								if data_block_len == 0:
 | 
				
			||||||
 | 
									break
 | 
				
			||||||
 | 
								byte_counter += data_block_len
 | 
				
			||||||
 | 
								stream.write(data_block)
 | 
				
			||||||
 | 
								block_size = self.best_block_size(after - before, data_block_len)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if not self._params.get('quiet', False):
 | 
				
			||||||
 | 
								print
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if data_len is not None and str(byte_counter) != data_len:
 | 
				
			||||||
 | 
								raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class InfoExtractor(object):
 | 
				
			||||||
 | 
						"""Information Extractor class.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Information extractors are the classes that, given a URL, extract
 | 
				
			||||||
 | 
						information from the video (or videos) the URL refers to. This
 | 
				
			||||||
 | 
						information includes the real video URL, the video title and simplified
 | 
				
			||||||
 | 
						title, author and others. It is returned in a list of dictionaries when
 | 
				
			||||||
 | 
						calling its extract() method. It is a list because a URL can refer to
 | 
				
			||||||
 | 
						more than one video (think of playlists). The dictionaries must include
 | 
				
			||||||
 | 
						the following fields:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						id:		Video identifier.
 | 
				
			||||||
 | 
						url:		Final video URL.
 | 
				
			||||||
 | 
						uploader:	Nickname of the video uploader.
 | 
				
			||||||
 | 
						title:		Literal title.
 | 
				
			||||||
 | 
						stitle:		Simplified title.
 | 
				
			||||||
 | 
						ext:		Video filename extension.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						Subclasses of this one should re-define the _real_initialize() and
 | 
				
			||||||
 | 
						_real_extract() methods, as well as the suitable() static method.
 | 
				
			||||||
 | 
						Probably, they should also be instantiated and added to the main
 | 
				
			||||||
 | 
						downloader.
 | 
				
			||||||
 | 
						"""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						_ready = False
 | 
				
			||||||
 | 
						_downloader = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def __init__(self, downloader=None):
 | 
				
			||||||
 | 
							"""Constructor. Receives an optional downloader."""
 | 
				
			||||||
 | 
							self._ready = False
 | 
				
			||||||
 | 
							self.set_downloader(downloader)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						@staticmethod
 | 
				
			||||||
 | 
						def suitable(url):
 | 
				
			||||||
 | 
							"""Receives a URL and returns True if suitable for this IE."""
 | 
				
			||||||
 | 
							return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def initialize(self):
 | 
				
			||||||
 | 
							"""Initializes an instance (login, etc)."""
 | 
				
			||||||
 | 
							if not self._ready:
 | 
				
			||||||
 | 
								self._real_initialize()
 | 
				
			||||||
 | 
								self._ready = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def extract(self, url):
 | 
				
			||||||
 | 
							"""Extracts URL information and returns it in list of dicts."""
 | 
				
			||||||
 | 
							self.initialize()
 | 
				
			||||||
 | 
							return self._real_extract(url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def set_downloader(self, downloader):
 | 
				
			||||||
 | 
							"""Sets the downloader for this IE."""
 | 
				
			||||||
 | 
							self._downloader = downloader
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						def to_stdout(self, message):
 | 
				
			||||||
 | 
							if self._downloader is None or not self._downloader.get_params().get('quiet', False):
 | 
				
			||||||
 | 
								print message
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						def to_stderr(self, message):
 | 
				
			||||||
 | 
							sys.stderr.write('%s\n' % message)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def _real_initialize(self):
 | 
				
			||||||
 | 
							"""Real initialization process. Redefine in subclasses."""
 | 
				
			||||||
 | 
							pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def _real_extract(self, url):
 | 
				
			||||||
 | 
							"""Real extraction process. Redefine in subclasses."""
 | 
				
			||||||
 | 
							pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class YoutubeIE(InfoExtractor):
 | 
				
			||||||
 | 
						"""Information extractor for youtube.com."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						_LOGIN_URL = 'http://www.youtube.com/login?next=/'
 | 
				
			||||||
 | 
						_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/'
 | 
				
			||||||
 | 
						_NETRC_MACHINE = 'youtube'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def _real_initialize(self):
 | 
				
			||||||
 | 
							if self._downloader is None:
 | 
				
			||||||
 | 
								return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							username = None
 | 
				
			||||||
 | 
							password = None
 | 
				
			||||||
 | 
							downloader_params = self._downloader.get_params()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Attempt to use provided username and password or .netrc data
 | 
				
			||||||
 | 
							if downloader_params.get('username', None) is not None:
 | 
				
			||||||
 | 
								username = downloader_params['username']
 | 
				
			||||||
 | 
								password = downloader_params['password']
 | 
				
			||||||
 | 
							elif downloader_params.get('usenetrc', False):
 | 
				
			||||||
 | 
								try:
 | 
				
			||||||
 | 
									info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 | 
				
			||||||
 | 
									if info is not None:
 | 
				
			||||||
 | 
										username = info[0]
 | 
				
			||||||
 | 
										password = info[2]
 | 
				
			||||||
 | 
									else:
 | 
				
			||||||
 | 
										raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 | 
				
			||||||
 | 
								except (IOError, netrc.NetrcParseError), err:
 | 
				
			||||||
 | 
									self.to_stderr('WARNING: parsing .netrc: %s' % str(err))
 | 
				
			||||||
 | 
									return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if username is None:
 | 
				
			||||||
 | 
								return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Log in
 | 
				
			||||||
 | 
							login_form = {	'current_form': 'loginForm',
 | 
				
			||||||
 | 
									'next':		'/',
 | 
				
			||||||
 | 
									'action_login':	'Log In',
 | 
				
			||||||
 | 
									'username':	username,
 | 
				
			||||||
 | 
									'password':	password,	}
 | 
				
			||||||
 | 
							request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 | 
				
			||||||
 | 
							try:
 | 
				
			||||||
 | 
								self.to_stdout('[youtube] Logging in')
 | 
				
			||||||
 | 
								login_results = urllib2.urlopen(request).read()
 | 
				
			||||||
 | 
								if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 | 
				
			||||||
 | 
									self.to_stderr('WARNING: Unable to log in: bad username or password')
 | 
				
			||||||
 | 
									return
 | 
				
			||||||
 | 
							except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 | 
				
			||||||
 | 
								self.to_stderr('WARNING: Unable to log in: %s' % str(err))
 | 
				
			||||||
 | 
								return
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
							# Confirm age
 | 
				
			||||||
 | 
							age_form = {	'next_url':		'/',
 | 
				
			||||||
 | 
									'action_confirm':	'Confirm',	}
 | 
				
			||||||
 | 
							request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 | 
				
			||||||
 | 
							try:
 | 
				
			||||||
 | 
								self.to_stdout('[youtube] Confirming age')
 | 
				
			||||||
 | 
								age_results = urllib2.urlopen(request).read()
 | 
				
			||||||
 | 
							except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 | 
				
			||||||
 | 
								sys.exit('ERROR: Unable to confirm age: %s' % str(err))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						def _real_extract(self, url):
 | 
				
			||||||
 | 
							# Extract video id from URL
 | 
				
			||||||
 | 
							mobj = re.match(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$', url)
 | 
				
			||||||
 | 
							if mobj is None:
 | 
				
			||||||
 | 
								self.to_stderr('ERROR: Invalid URL: %s' % url)
 | 
				
			||||||
 | 
								return [None]
 | 
				
			||||||
 | 
							video_id = mobj.group(2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Downloader parameters
 | 
				
			||||||
 | 
							format_param = None
 | 
				
			||||||
 | 
							if self._downloader is not None:
 | 
				
			||||||
 | 
								params = self._downloader.get_params()
 | 
				
			||||||
 | 
								format_param = params.get('format', None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Extension
 | 
				
			||||||
 | 
							video_extension = {18: 'mp4'}.get(format_param, 'flv')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Normalize URL, including format
 | 
				
			||||||
 | 
							normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id
 | 
				
			||||||
 | 
							if format_param is not None:
 | 
				
			||||||
 | 
								normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
 | 
				
			||||||
 | 
							request = urllib2.Request(normalized_url, None, std_headers)
 | 
				
			||||||
 | 
							try:
 | 
				
			||||||
 | 
								self.to_stdout('[youtube] %s: Downloading video webpage' % video_id)
 | 
				
			||||||
 | 
								video_webpage = urllib2.urlopen(request).read()
 | 
				
			||||||
 | 
							except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 | 
				
			||||||
 | 
								sys.exit('ERROR: Unable to download video: %s' % str(err))
 | 
				
			||||||
 | 
							self.to_stdout('[youtube] %s: Extracting video information' % video_id)
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
							# "t" param
 | 
				
			||||||
 | 
							mobj = re.search(r', "t": "([^"]+)"', video_webpage)
 | 
				
			||||||
 | 
							if mobj is None:
 | 
				
			||||||
 | 
								self.to_stderr('ERROR: Unable to extract "t" parameter')
 | 
				
			||||||
 | 
								return [None]
 | 
				
			||||||
 | 
							video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
 | 
				
			||||||
 | 
							if format_param is not None:
 | 
				
			||||||
 | 
								video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# uploader
 | 
				
			||||||
 | 
							mobj = re.search(r'More From: ([^<]*)<', video_webpage)
 | 
				
			||||||
 | 
							if mobj is None:
 | 
				
			||||||
 | 
								self.to_stderr('ERROR: Unable to extract uploader nickname')
 | 
				
			||||||
 | 
								return [None]
 | 
				
			||||||
 | 
							video_uploader = mobj.group(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# title
 | 
				
			||||||
 | 
							mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
 | 
				
			||||||
 | 
							if mobj is None:
 | 
				
			||||||
 | 
								self.to_stderr('ERROR: Unable to extract video title')
 | 
				
			||||||
 | 
								return [None]
 | 
				
			||||||
 | 
							video_title = mobj.group(1).decode('utf-8')
 | 
				
			||||||
 | 
							video_title = re.sub(u'&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# simplified title
 | 
				
			||||||
 | 
							simple_title = re.sub(u'([^%s]+)' % simple_title_chars, u'_', video_title)
 | 
				
			||||||
 | 
							simple_title = simple_title.strip(u'_')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Return information
 | 
				
			||||||
 | 
							return [{	'id':		video_id,
 | 
				
			||||||
 | 
									'url':		video_real_url,
 | 
				
			||||||
 | 
									'uploader':	video_uploader,
 | 
				
			||||||
 | 
									'title':	video_title,
 | 
				
			||||||
 | 
									'stitle':	simple_title,
 | 
				
			||||||
 | 
									'ext':		video_extension,
 | 
				
			||||||
 | 
									}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
						try:
 | 
				
			||||||
 | 
							# General configuration
 | 
				
			||||||
 | 
							urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
 | 
				
			||||||
 | 
							urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# Information extractors
 | 
				
			||||||
 | 
							youtube_ie = YoutubeIE()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							# File downloader
 | 
				
			||||||
 | 
							fd = FileDownloader({	'usenetrc': False,
 | 
				
			||||||
 | 
										'username': None,
 | 
				
			||||||
 | 
										'password': None,
 | 
				
			||||||
 | 
										'quiet': False,
 | 
				
			||||||
 | 
										'format': None,
 | 
				
			||||||
 | 
										'outtmpl': '%(id)s.%(ext)s'
 | 
				
			||||||
 | 
										})
 | 
				
			||||||
 | 
							fd.add_info_extractor(youtube_ie)
 | 
				
			||||||
 | 
							fd.download([	'http://www.youtube.com/watch?v=t7qdwI7TVe8',
 | 
				
			||||||
 | 
									'http://www.youtube.com/watch?v=IJyn3pRcy_Q',
 | 
				
			||||||
 | 
									'http://www.youtube.com/watch?v=DZRXe1wtC-M',	])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						except KeyboardInterrupt:
 | 
				
			||||||
 | 
							sys.exit('\nERROR: Interrupted by user')
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue