disable image scraping
due to being IP flagged
This commit is contained in:
		
							parent
							
								
									00c0069fa9
								
							
						
					
					
						commit
						349d16b06d
					
				
					 1 changed files with 117 additions and 112 deletions
				
			
		|  | @ -155,134 +155,139 @@ export default class scraper { | ||||||
| 
 | 
 | ||||||
| 	/* | 	/* | ||||||
| 	Scrape the images from a page. | 	Scrape the images from a page. | ||||||
|  | 	It's temporarily disabled due to consequent flagging of the IP address. Also it's output is not yet implemented. This is a future point of expansion (Crit E). | ||||||
| 
 | 
 | ||||||
| 	@param {Object} fields the fields to scrape | 	@param {Object} fields the fields to scrape | ||||||
| 	@param {Object} options the options | 	@param {Object} options the options | ||||||
| 	@return {Object} the blob of the images | 	@return {Object} the blob of the images | ||||||
| 	*/ | 	*/ | ||||||
| 	async getImages(fields, options) { | 	async getImages(fields, options) { | ||||||
| 		let CONTENT;  | 		let DISABLE = true // This is how to disable it
 | ||||||
| 
 | 
 | ||||||
| 		/* | 		if (!DISABLE) { | ||||||
| 		Get the blob of the image in an element.  | 			let CONTENT; | ||||||
| 
 |  | ||||||
| 		@param {Element} element the element to get the blob from |  | ||||||
| 		@return {Blob} the blob of the image |  | ||||||
| 		*/ |  | ||||||
| 		async function blobbify(element) { |  | ||||||
| 			/* |  | ||||||
| 			Get the URL of the image.  |  | ||||||
| 
 |  | ||||||
| 			@param {Element} element the element to get the URL from |  | ||||||
| 			@return {String} the URL of the image |  | ||||||
| 			*/ |  | ||||||
| 			function reference(element) { |  | ||||||
| 				let LOCATION; |  | ||||||
| 
 |  | ||||||
| 				// Get using standard attributes. 
 |  | ||||||
| 				LOCATION = element.getAttribute(`src`); |  | ||||||
| 
 |  | ||||||
| 				if (!LOCATION) { |  | ||||||
| 					// Use the CSS background image.
 |  | ||||||
| 					(window.getComputedStyle(element).backgroundImage) |  | ||||||
| 						? LOCATION = window.getComputedStyle(element).backgroundImage.slice(4, -1).replace(/"/g, "") |  | ||||||
| 						: false; |  | ||||||
| 				} |  | ||||||
| 
 |  | ||||||
| 				// Return the location. 
 |  | ||||||
| 				return LOCATION; |  | ||||||
| 			} |  | ||||||
| 
 | 
 | ||||||
| 			/* | 			/* | ||||||
| 			Get the blob from the URL.  | 			Get the blob of the image in an element. | ||||||
| 
 | 
 | ||||||
| 			@param {String} URL the URL to get the blob from | 			@param {Element} element the element to get the blob from | ||||||
| 			@return {Blob} the blob of the image | 			@return {Blob} the blob of the image | ||||||
| 			*/ | 			*/ | ||||||
| 			function getBlob(URL) { | 			async function blobbify(element) { | ||||||
| 				return(net.download(URL, `blob`)); | 				/* | ||||||
| 			} | 				Get the URL of the image. | ||||||
| 
 | 
 | ||||||
| 			let LOCATION = reference(element); | 				@param {Element} element the element to get the URL from | ||||||
| 			let BLOB = await getBlob(LOCATION); | 				@return {String} the URL of the image | ||||||
|  | 				*/ | ||||||
|  | 				function reference(element) { | ||||||
|  | 					let LOCATION; | ||||||
| 
 | 
 | ||||||
| 			return ((BLOB.type.includes(`image`)) ? BLOB : null); | 					// Get using standard attributes.
 | ||||||
| 		} | 					LOCATION = element.getAttribute(`src`); | ||||||
| 
 | 
 | ||||||
| 		/* Read for the particular fields. */ | 					if (!LOCATION) { | ||||||
| 		async function read(fields) { | 						// Use the CSS background image.
 | ||||||
| 			/* | 						(window.getComputedStyle(element).backgroundImage) | ||||||
| 			Select all images from an element and get their blobs.  | 							? LOCATION = window.getComputedStyle(element).backgroundImage.slice(4, -1).replace(/"/g, "") | ||||||
| 
 | 							: false; | ||||||
| 			@param {Element} element the element to get the images from |  | ||||||
| 			@return {Array} the blobs of the images |  | ||||||
| 			*/ |  | ||||||
| 			async function select(element) { |  | ||||||
| 				let IMAGES = [...element.querySelectorAll(`*`)]; |  | ||||||
| 				let BLOBS = []; |  | ||||||
| 
 |  | ||||||
| 				if (IMAGES && IMAGES.length) { |  | ||||||
| 					for (let IMAGE of IMAGES) { |  | ||||||
| 						let BLOB = await blobbify(IMAGE); |  | ||||||
| 						(BLOB) ? BLOBS.push(BLOB) : false; |  | ||||||
| 					} | 					} | ||||||
|  | 
 | ||||||
|  | 					// Return the location.
 | ||||||
|  | 					return LOCATION; | ||||||
| 				} | 				} | ||||||
| 
 | 
 | ||||||
| 				return BLOBS; | 				/* | ||||||
|  | 				Get the blob from the URL. | ||||||
|  | 
 | ||||||
|  | 				@param {String} URL the URL to get the blob from | ||||||
|  | 				@return {Blob} the blob of the image | ||||||
|  | 				*/ | ||||||
|  | 				function getBlob(URL) { | ||||||
|  | 					return(net.download(URL, `blob`)); | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				let LOCATION = reference(element); | ||||||
|  | 				let BLOB = await getBlob(LOCATION); | ||||||
|  | 
 | ||||||
|  | 				return ((BLOB.type.includes(`image`)) ? BLOB : null); | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
| 			let DATA = []; // Store here the resulting data
 | 			/* Read for the particular fields. */ | ||||||
|  | 			async function read(fields) { | ||||||
|  | 				/* | ||||||
|  | 				Select all images from an element and get their blobs. | ||||||
| 
 | 
 | ||||||
| 			for (let NAME of Object.keys(fields)) { | 				@param {Element} element the element to get the images from | ||||||
| 				// Remove trailing spaces within the name. 
 | 				@return {Array} the blobs of the images | ||||||
| 				NAME = (typeof NAME).includes(`str`) ? NAME.trim() : NAME; | 				*/ | ||||||
| 				let VALUE = fields[NAME]; | 				async function select(element) { | ||||||
|  | 					let IMAGES = [...element.querySelectorAll(`*`)]; | ||||||
|  | 					let BLOBS = []; | ||||||
| 
 | 
 | ||||||
| 				if (VALUE && NAME) { | 					if (IMAGES && IMAGES.length) { | ||||||
| 					// Check if array.
 | 						for (let IMAGE of IMAGES) { | ||||||
| 					if (Array.isArray(VALUE)) { | 							let BLOB = await blobbify(IMAGE); | ||||||
| 						// Temporarily create an empty list. 
 | 							(BLOB) ? BLOBS.push(BLOB) : false; | ||||||
| 						for (let PARTICULAR of VALUE) { | 						} | ||||||
| 							if ((typeof PARTICULAR).includes(`obj`) && PARTICULAR && !Array.isArray(PARTICULAR)) { | 					} | ||||||
| 								DATA = [...DATA, ...(await read(PARTICULAR))]; |  | ||||||
| 							} else { |  | ||||||
| 								let ELEMENTS = [...(document.querySelectorAll(PARTICULAR))]; |  | ||||||
| 
 | 
 | ||||||
| 								if (ELEMENTS && ELEMENTS.length) { | 					return BLOBS; | ||||||
| 									for (let ELEMENT of ELEMENTS) { | 				} | ||||||
| 										let BLOBS = await select(ELEMENT); | 
 | ||||||
| 										if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS]; | 				let DATA = []; // Store here the resulting data
 | ||||||
|  | 
 | ||||||
|  | 				for (let NAME of Object.keys(fields)) { | ||||||
|  | 					// Remove trailing spaces within the name.
 | ||||||
|  | 					NAME = (typeof NAME).includes(`str`) ? NAME.trim() : NAME; | ||||||
|  | 					let VALUE = fields[NAME]; | ||||||
|  | 
 | ||||||
|  | 					if (VALUE && NAME) { | ||||||
|  | 						// Check if array.
 | ||||||
|  | 						if (Array.isArray(VALUE)) { | ||||||
|  | 							// Temporarily create an empty list.
 | ||||||
|  | 							for (let PARTICULAR of VALUE) { | ||||||
|  | 								if ((typeof PARTICULAR).includes(`obj`) && PARTICULAR && !Array.isArray(PARTICULAR)) { | ||||||
|  | 									DATA = [...DATA, ...(await read(PARTICULAR))]; | ||||||
|  | 								} else { | ||||||
|  | 									let ELEMENTS = [...(document.querySelectorAll(PARTICULAR))]; | ||||||
|  | 
 | ||||||
|  | 									if (ELEMENTS && ELEMENTS.length) { | ||||||
|  | 										for (let ELEMENT of ELEMENTS) { | ||||||
|  | 											let BLOBS = await select(ELEMENT); | ||||||
|  | 											if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS]; | ||||||
|  | 										} | ||||||
| 									} | 									} | ||||||
| 								} | 								} | ||||||
| 							} | 							} | ||||||
| 						} | 						} else if ((typeof VALUE).includes(`obj`) && VALUE) { | ||||||
| 					} else if ((typeof VALUE).includes(`obj`) && VALUE) { | 							DATA = [...DATA, ...(await read(VALUE))]; | ||||||
| 						DATA = [...DATA, ...(await read(VALUE))]; | 						} else if (document.querySelector(VALUE)) { | ||||||
| 					} else if (document.querySelector(VALUE)) { | 							let ELEMENTS = [...(document.querySelectorAll(VALUE))]; | ||||||
| 						let ELEMENTS = [...(document.querySelectorAll(VALUE))]; |  | ||||||
| 
 | 
 | ||||||
| 						if (ELEMENTS && ELEMENTS.length) { | 							if (ELEMENTS && ELEMENTS.length) { | ||||||
| 							for (let ELEMENT of ELEMENTS) { | 								for (let ELEMENT of ELEMENTS) { | ||||||
| 								let BLOBS = await select(ELEMENT); | 									let BLOBS = await select(ELEMENT); | ||||||
| 								if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS]; | 									if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS]; | ||||||
|  | 								} | ||||||
| 							} | 							} | ||||||
| 						} | 						} | ||||||
| 					} | 					} | ||||||
| 				} | 				} | ||||||
| 			} |  | ||||||
| 
 | 
 | ||||||
| 			return (DATA); | 				return (DATA); | ||||||
| 		}; | 			}; | ||||||
| 
 | 
 | ||||||
| 		// Read the fields. 
 | 			// Read the fields.
 | ||||||
| 		(((typeof fields).includes(`obj`) && fields) ? Object.keys(fields).length : false) | 			(((typeof fields).includes(`obj`) && fields) ? Object.keys(fields).length : false) | ||||||
| 			? CONTENT = await read(fields) | 				? CONTENT = await read(fields) | ||||||
| 			: false; | 				: false; | ||||||
| 
 | 
 | ||||||
| 		// Set the data if the options doesn't indicate otherwise. 
 | 			// Set the data if the options doesn't indicate otherwise.
 | ||||||
| 		(((((typeof options).includes(`obj`) && options) ? Object.hasOwn(`update`) : false) ? options[`update`] : true) && CONTENT) | 			(((((typeof options).includes(`obj`) && options) ? Object.hasOwn(`update`) : false) ? options[`update`] : true) && CONTENT) | ||||||
| 			? this.images = CONTENT | 				? this.images = CONTENT | ||||||
| 			: false; | 				: false; | ||||||
| 		return (CONTENT); | 			return (CONTENT); | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue