Consider changes in height
attempt to wait until window is available
This commit is contained in:
		
							parent
							
								
									da0ba0a40d
								
							
						
					
					
						commit
						2a7c2fc750
					
				
					 1 changed files with 76 additions and 47 deletions
				
			
		
							
								
								
									
										123
									
								
								scripts/external/scraper.js
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										123
									
								
								scripts/external/scraper.js
									
										
									
									
										vendored
									
									
								
							|  | @ -3,69 +3,98 @@ Read the contents of the page. | ||||||
| */ | */ | ||||||
| 
 | 
 | ||||||
| export default class scraper { | export default class scraper { | ||||||
| 	constructor(scraper_fields) { | 	/* | ||||||
|  | 	Scrape fields.  | ||||||
|  | 
 | ||||||
|  | 	@param {Object} scraper_fields the fields to scrape | ||||||
|  | 	@param {Object} options the options | ||||||
|  | 	*/ | ||||||
|  | 	constructor(scraper_fields, options = {"wait until available": true}) { | ||||||
| 		let field_content; | 		let field_content; | ||||||
| 
 | 
 | ||||||
| 		// Quickly scroll down then to where the user already was to get automatically hidden content. 
 | 		// Quickly scroll down then to where the user already was to get automatically hidden content. 
 | ||||||
| 		function autoscroll() { | 		function autoscroll() { | ||||||
| 			let SCROLL = {"x": window.scrollX, "y": window.scrollY}; | 			let SCROLL = {"x": parseInt(window.scrollX), "y": parseInt(window.scrollY)}; | ||||||
| 
 | 
 | ||||||
| 			// Repeat two times to ensure proper webpage load. 
 | 			// Repeat every ten milliseconds until 3 times.
 | ||||||
| 			for (let TIMES = 1; TIMES <= 2; TIMES++) { | 
 | ||||||
| 				[{"top": 0, "left": 0, "behavior": "smooth"}, {"top": document.body.scrollHeight, "left": document.body.scrollWidth, "behavior": "smooth"}].forEach((POSITION) => { | 			for (let SCROLLS = 1; SCROLLS <= 2; SCROLLS++) { | ||||||
| 					window.scrollTo(POSITION); | 				[{"top": document.body.scrollHeight, "left": document.body.scrollWidth}, {"top": 0, "left": 0}].forEach(POSITION => { | ||||||
| 				}) | 					setTimeout(() => { | ||||||
| 			}; | 						window.scrollTo(POSITION); | ||||||
|  | 					}, 10); | ||||||
|  | 				}); | ||||||
|  | 			} | ||||||
| 
 | 
 | ||||||
| 			// Scroll back to user's previous position.
 | 			// Scroll back to user's previous position.
 | ||||||
| 			window.scrollTo({"top": SCROLL[`y`], "left": SCROLL[`x`], "behavior": "smooth"}); | 			setTimeout(() => { | ||||||
|  | 				window.scrollTo(SCROLL); | ||||||
|  | 			}, 5) | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
| 		autoscroll(); | 		const read = () => { | ||||||
| 
 | 			if ((typeof scraper_fields).includes("object") && scraper_fields != null && scraper_fields) { | ||||||
| 		if ((typeof scraper_fields).includes("object") && scraper_fields != null && scraper_fields) { | 	 | ||||||
| 
 | 				/* Read for the particular fields. */ | ||||||
| 			/* Read for the particular fields. */ | 				function read(fields) { | ||||||
| 			function read(fields) { | 					let field_data = {}; | ||||||
| 				let field_data = {}; | 	 | ||||||
| 
 | 					(Object.keys(fields)).forEach((FIELD_NAME) => { | ||||||
| 				(Object.keys(fields)).forEach((FIELD_NAME) => { | 						let FIELD = {"name": FIELD_NAME, "value": fields[FIELD_NAME]}; | ||||||
| 					let FIELD = {"name": FIELD_NAME, "value": fields[FIELD_NAME]}; | 	 | ||||||
| 
 | 						if (FIELD[`value`]) { | ||||||
| 					if (FIELD[`value`]) { | 							// Check if array.
 | ||||||
| 						// Check if array.
 | 							if (Array.isArray(FIELD[`value`])) { | ||||||
| 						if (Array.isArray(FIELD[`value`])) { | 								// Temporarily create an empty list. 
 | ||||||
| 							// Temporarily create an empty list. 
 | 								field_data[FIELD[`name`]] = []; | ||||||
| 							field_data[FIELD[`name`]] = []; |  | ||||||
| 							 |  | ||||||
| 							if (typeof FIELD[`value`][0] == "object" && FIELD[`value`][0] != null && !Array.isArray(FIELD[`value`][0])) { |  | ||||||
| 								field_data[FIELD[`name`]].push(read(FIELD[`value`][0])); |  | ||||||
| 							} else { |  | ||||||
| 								let ELEMENTS = (document.querySelectorAll(FIELD[`value`][0])); |  | ||||||
| 								 | 								 | ||||||
| 								if (ELEMENTS.length > 0) { | 								if (typeof FIELD[`value`][0] == "object" && FIELD[`value`][0] != null && !Array.isArray(FIELD[`value`][0])) { | ||||||
| 									(ELEMENTS).forEach((ELEMENT) => { | 									field_data[FIELD[`name`]].push(read(FIELD[`value`][0])); | ||||||
| 										field_data[FIELD[`name`]].push(ELEMENT.innerText); | 								} else { | ||||||
| 									}) | 									let ELEMENTS = (document.querySelectorAll(FIELD[`value`][0])); | ||||||
|  | 									 | ||||||
|  | 									if (ELEMENTS.length > 0) { | ||||||
|  | 										(ELEMENTS).forEach((ELEMENT) => { | ||||||
|  | 											field_data[FIELD[`name`]].push(ELEMENT.innerText); | ||||||
|  | 										}) | ||||||
|  | 									}; | ||||||
| 								}; | 								}; | ||||||
|  | 							} else if ((typeof FIELD[`value`]).includes(`obj`) && FIELD[`value`] != null) { | ||||||
|  | 								field_data[FIELD[`name`]] = read(FIELD[`value`]); | ||||||
|  | 							} else if (document.querySelector(FIELD[`value`])) { | ||||||
|  | 								field_data[FIELD[`name`]] = document.querySelector(FIELD[`value`]).innerText; | ||||||
| 							}; | 							}; | ||||||
| 						} else if ((typeof FIELD[`value`]).includes(`obj`) && FIELD[`value`] != null) { |  | ||||||
| 							field_data[FIELD[`name`]] = read(FIELD[`value`]); |  | ||||||
| 						} else if (document.querySelector(FIELD[`value`])) { |  | ||||||
| 							field_data[FIELD[`name`]] = document.querySelector(FIELD[`value`]).innerText; |  | ||||||
| 						}; | 						}; | ||||||
| 					}; | 					}); | ||||||
|  | 	 | ||||||
|  | 					return field_data; | ||||||
|  | 				}; | ||||||
|  | 				field_content = read(scraper_fields); | ||||||
|  | 			} | ||||||
|  | 	 | ||||||
|  | 			if (Object.keys(field_content).length > 0) { | ||||||
|  | 				(Object.keys(field_content)).forEach((field_name) => { | ||||||
|  | 					this[field_name] = field_content[field_name]; | ||||||
| 				}); | 				}); | ||||||
|  | 			} | ||||||
|  | 		}; | ||||||
| 
 | 
 | ||||||
| 				return field_data; | 		// Check every 1 second to check until autosccroll is done.
 | ||||||
| 			}; | 		function wait_autoscroll(OPTIONS) { | ||||||
| 			field_content = read(scraper_fields); | 			return new Promise((resolve, reject) => { | ||||||
| 		} | 				// Check if autoscroll is done.
 | ||||||
| 
 | 				if (!((typeof window).includes(`undef`))) { | ||||||
| 		if (Object.keys(field_content).length > 0) { | 					autoscroll(); | ||||||
| 			(Object.keys(field_content)).forEach((field_name) => { | 					resolve(); | ||||||
| 				this[field_name] = field_content[field_name]; | 				} else if (OPTIONS[`wait until available`]) { | ||||||
|  | 					setTimeout(() => { | ||||||
|  | 						wait_autoscroll().then(resolve).catch(reject); | ||||||
|  | 					}, 1000); | ||||||
|  | 				} else { | ||||||
|  | 					reject(); | ||||||
|  | 				} | ||||||
| 			}); | 			}); | ||||||
| 		} | 		} | ||||||
|  | 		wait_autoscroll(options).then(() => {read();}); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue