scrape images aside from texts
This commit is contained in:
		
							parent
							
								
									af996cfc1d
								
							
						
					
					
						commit
						7ad7072456
					
				
					 1 changed files with 129 additions and 15 deletions
				
			
		
							
								
								
									
										144
									
								
								scripts/external/scraper.js
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										144
									
								
								scripts/external/scraper.js
									
										
									
									
										vendored
									
									
								
							|  | @ -79,22 +79,136 @@ export default class scraper { | ||||||
| 			} | 			} | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
| 		// Check every 1 second to check until autosccroll is done.
 | 	/* | ||||||
| 		function wait_autoscroll(OPTIONS) { | 	Scrape the images from a page.  | ||||||
| 			return new Promise((resolve, reject) => { | 
 | ||||||
| 				// Check if autoscroll is done.
 | 	@param {Object} fields the fields to scrape | ||||||
| 				if (!((typeof window).includes(`undef`))) { | 	@param {Object} options the options | ||||||
| 					autoscroll(); | 	@return {Object} the blob of the images | ||||||
| 					resolve(); | 	*/ | ||||||
| 				} else if (OPTIONS[`wait until available`]) { | 	async getImages(fields, options) { | ||||||
| 					setTimeout(() => { | 		let CONTENT;  | ||||||
| 						wait_autoscroll().then(resolve).catch(reject); | 
 | ||||||
| 					}, 1000); | 		/* | ||||||
| 				} else { | 		Get the blob of the image in an element.  | ||||||
| 					reject(); | 
 | ||||||
|  | 		@param {Element} element the element to get the blob from | ||||||
|  | 		@return {Blob} the blob of the image | ||||||
|  | 		*/ | ||||||
|  | 		async function blobbify(element) { | ||||||
|  | 			/* | ||||||
|  | 			Get the URL of the image.  | ||||||
|  | 
 | ||||||
|  | 			@param {Element} element the element to get the URL from | ||||||
|  | 			@return {String} the URL of the image | ||||||
|  | 			*/ | ||||||
|  | 			function reference(element) { | ||||||
|  | 				let LOCATION; | ||||||
|  | 
 | ||||||
|  | 				// Get using standard attributes. 
 | ||||||
|  | 				LOCATION = element.getAttribute(`src`); | ||||||
|  | 
 | ||||||
|  | 				if (!LOCATION) { | ||||||
|  | 					// Use the CSS background image.
 | ||||||
|  | 					(window.getComputedStyle(element).backgroundImage) | ||||||
|  | 						? LOCATION = window.getComputedStyle(element).backgroundImage.slice(4, -1).replace(/"/g, "") | ||||||
|  | 						: false; | ||||||
| 				} | 				} | ||||||
| 			}); | 
 | ||||||
|  | 				// Return the location. 
 | ||||||
|  | 				return LOCATION; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			/* | ||||||
|  | 			Get the blob from the URL.  | ||||||
|  | 
 | ||||||
|  | 			@param {String} URL the URL to get the blob from | ||||||
|  | 			@return {Blob} the blob of the image | ||||||
|  | 			*/ | ||||||
|  | 			function getBlob(URL) { | ||||||
|  | 				return(net.download(URL, `blob`)); | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			let LOCATION = reference(element); | ||||||
|  | 			let BLOB = await getBlob(LOCATION); | ||||||
|  | 
 | ||||||
|  | 			return ((BLOB.type.includes(`image`)) ? BLOB : null); | ||||||
| 		} | 		} | ||||||
| 		wait_autoscroll(options).then(() => {read();}); | 
 | ||||||
|  | 		/* Read for the particular fields. */ | ||||||
|  | 		async function read(fields) { | ||||||
|  | 			/* | ||||||
|  | 			Select all images from an element and get their blobs.  | ||||||
|  | 
 | ||||||
|  | 			@param {Element} element the element to get the images from | ||||||
|  | 			@return {Array} the blobs of the images | ||||||
|  | 			*/ | ||||||
|  | 			async function select(element) { | ||||||
|  | 				let IMAGES = [...element.querySelectorAll(`*`)]; | ||||||
|  | 				let BLOBS = []; | ||||||
|  | 
 | ||||||
|  | 				if (IMAGES && IMAGES.length) { | ||||||
|  | 					for (let IMAGE of IMAGES) { | ||||||
|  | 						let BLOB = await blobbify(IMAGE); | ||||||
|  | 						(BLOB) ? BLOBS.push(BLOB) : false; | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				return BLOBS; | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			let DATA = []; // Store here the resulting data
 | ||||||
|  | 
 | ||||||
|  | 			for (let NAME of Object.keys(fields)) { | ||||||
|  | 				// Remove trailing spaces within the name. 
 | ||||||
|  | 				NAME = (typeof NAME).includes(`str`) ? NAME.trim() : NAME; | ||||||
|  | 				let VALUE = fields[NAME]; | ||||||
|  | 
 | ||||||
|  | 				if (VALUE && NAME) { | ||||||
|  | 					// Check if array.
 | ||||||
|  | 					if (Array.isArray(VALUE)) { | ||||||
|  | 						// Temporarily create an empty list. 
 | ||||||
|  | 						for (let PARTICULAR of VALUE) { | ||||||
|  | 							if ((typeof PARTICULAR).includes(`obj`) && PARTICULAR && !Array.isArray(PARTICULAR)) { | ||||||
|  | 								DATA = [...DATA, ...(await read(PARTICULAR))]; | ||||||
|  | 				} else { | ||||||
|  | 								let ELEMENTS = [...(document.querySelectorAll(PARTICULAR))]; | ||||||
|  | 
 | ||||||
|  | 								if (ELEMENTS && ELEMENTS.length) { | ||||||
|  | 									for (let ELEMENT of ELEMENTS) { | ||||||
|  | 										let BLOBS = await select(ELEMENT); | ||||||
|  | 										if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS]; | ||||||
|  | 									} | ||||||
|  | 								} | ||||||
|  | 							} | ||||||
|  | 						} | ||||||
|  | 					} else if ((typeof VALUE).includes(`obj`) && VALUE) { | ||||||
|  | 						DATA = [...DATA, ...(await read(VALUE))]; | ||||||
|  | 					} else if (document.querySelector(VALUE)) { | ||||||
|  | 						let ELEMENTS = [...(document.querySelectorAll(VALUE))]; | ||||||
|  | 
 | ||||||
|  | 						if (ELEMENTS && ELEMENTS.length) { | ||||||
|  | 							for (let ELEMENT of ELEMENTS) { | ||||||
|  | 								let BLOBS = await select(ELEMENT); | ||||||
|  | 								if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS]; | ||||||
|  | 							} | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 
 | ||||||
|  | 			return (DATA); | ||||||
|  | 		}; | ||||||
|  | 
 | ||||||
|  | 		// Read the fields. 
 | ||||||
|  | 		(((typeof fields).includes(`obj`) && fields) ? Object.keys(fields).length : false) | ||||||
|  | 			? CONTENT = await read(fields) | ||||||
|  | 			: false; | ||||||
|  | 
 | ||||||
|  | 		// Set the data if the options doesn't indicate otherwise. 
 | ||||||
|  | 		(((((typeof options).includes(`obj`) && options) ? Object.hasOwn(`update`) : false) ? options[`update`] : true) && CONTENT) | ||||||
|  | 			? this.images = CONTENT | ||||||
|  | 			: false; | ||||||
|  | 		return (CONTENT); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue