[extractor/common] Relax interaction count extraction in _json_ld
This commit is contained in:
		
							parent
							
								
									ad06b99dd4
								
							
						
					
					
						commit
						ce5b904050
					
				
					 1 changed files with 5 additions and 1 deletions
				
			
		| 
						 | 
					@ -68,6 +68,7 @@ from ..utils import (
 | 
				
			||||||
    sanitized_Request,
 | 
					    sanitized_Request,
 | 
				
			||||||
    sanitize_filename,
 | 
					    sanitize_filename,
 | 
				
			||||||
    str_or_none,
 | 
					    str_or_none,
 | 
				
			||||||
 | 
					    str_to_int,
 | 
				
			||||||
    strip_or_none,
 | 
					    strip_or_none,
 | 
				
			||||||
    unescapeHTML,
 | 
					    unescapeHTML,
 | 
				
			||||||
    unified_strdate,
 | 
					    unified_strdate,
 | 
				
			||||||
| 
						 | 
					@ -1248,7 +1249,10 @@ class InfoExtractor(object):
 | 
				
			||||||
                interaction_type = is_e.get('interactionType')
 | 
					                interaction_type = is_e.get('interactionType')
 | 
				
			||||||
                if not isinstance(interaction_type, compat_str):
 | 
					                if not isinstance(interaction_type, compat_str):
 | 
				
			||||||
                    continue
 | 
					                    continue
 | 
				
			||||||
                interaction_count = int_or_none(is_e.get('userInteractionCount'))
 | 
					                # For interaction count some sites provide string instead of
 | 
				
			||||||
 | 
					                # an integer (as per spec) with non digit characters (e.g. ",")
 | 
				
			||||||
 | 
					                # so extracting count with more relaxed str_to_int
 | 
				
			||||||
 | 
					                interaction_count = str_to_int(is_e.get('userInteractionCount'))
 | 
				
			||||||
                if interaction_count is None:
 | 
					                if interaction_count is None:
 | 
				
			||||||
                    continue
 | 
					                    continue
 | 
				
			||||||
                count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
 | 
					                count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue