some HTMLParser bugfixes
This commit is contained in:
		
							parent
							
								
									9e6dd23876
								
							
						
					
					
						commit
						9beb5af82e
					
				
					 4 changed files with 14 additions and 5 deletions
				
			
		
							
								
								
									
										
											BIN
										
									
								
								youtube-dl
									
										
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								youtube-dl
									
										
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								youtube-dl.exe
									
										
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								youtube-dl.exe
									
										
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							| 
						 | 
				
			
			@ -359,8 +359,8 @@ class YoutubeIE(InfoExtractor):
 | 
			
		|||
					pass
 | 
			
		||||
 | 
			
		||||
		# description
 | 
			
		||||
		video_description = get_element_by_id("eow-description", video_webpage)
 | 
			
		||||
		if video_description: video_description = clean_html(video_description.decode('utf8'))
 | 
			
		||||
		video_description = get_element_by_id("eow-description", video_webpage.decode('utf8'))
 | 
			
		||||
		if video_description: video_description = clean_html(video_description)
 | 
			
		||||
		else: video_description = ''
 | 
			
		||||
			
 | 
			
		||||
		# closed captions
 | 
			
		||||
| 
						 | 
				
			
			@ -1055,8 +1055,8 @@ class VimeoIE(InfoExtractor):
 | 
			
		|||
		video_thumbnail = config["video"]["thumbnail"]
 | 
			
		||||
 | 
			
		||||
		# Extract video description
 | 
			
		||||
		video_description = get_element_by_id("description", webpage)
 | 
			
		||||
		if video_description: video_description = clean_html(video_description.decode('utf8'))
 | 
			
		||||
		video_description = get_element_by_id("description", webpage.decode('utf8'))
 | 
			
		||||
		if video_description: video_description = clean_html(video_description)
 | 
			
		||||
		else: video_description = ''
 | 
			
		||||
 | 
			
		||||
		# Extract upload date
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -73,7 +73,7 @@ def htmlentity_transform(matchobj):
 | 
			
		|||
	# Unknown entity in name, return its literal representation
 | 
			
		||||
	return (u'&%s;' % entity)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 | 
			
		||||
class IDParser(HTMLParser.HTMLParser):
 | 
			
		||||
	"""Modified HTMLParser that isolates a tag with the specified id"""
 | 
			
		||||
	def __init__(self, id):
 | 
			
		||||
| 
						 | 
				
			
			@ -83,8 +83,17 @@ class IDParser(HTMLParser.HTMLParser):
 | 
			
		|||
		self.depth = {}
 | 
			
		||||
		self.html = None
 | 
			
		||||
		self.watch_startpos = False
 | 
			
		||||
		self.error_count = 0
 | 
			
		||||
		HTMLParser.HTMLParser.__init__(self)
 | 
			
		||||
 | 
			
		||||
	def error(self, message):
 | 
			
		||||
		print self.getpos()
 | 
			
		||||
		if self.error_count > 10 or self.started:
 | 
			
		||||
			raise HTMLParser.HTMLParseError(message, self.getpos())
 | 
			
		||||
		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 | 
			
		||||
		self.error_count += 1
 | 
			
		||||
		self.goahead(1)
 | 
			
		||||
 | 
			
		||||
	def loads(self, html):
 | 
			
		||||
		self.html = html
 | 
			
		||||
		self.feed(html)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue