From 2a7c2fc750bc12a81ac9f0a7aa162e11356d67fd Mon Sep 17 00:00:00 2001 From: buzz-lightsnack-2007 <73412182+buzz-lightsnack-2007@users.noreply.github.com> Date: Sun, 5 May 2024 22:54:57 +0800 Subject: [PATCH] Consider changes in height attempt to wait until window is available --- scripts/external/scraper.js | 123 ++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 47 deletions(-) diff --git a/scripts/external/scraper.js b/scripts/external/scraper.js index 7c9ee45..9f06293 100644 --- a/scripts/external/scraper.js +++ b/scripts/external/scraper.js @@ -3,69 +3,98 @@ Read the contents of the page. */ export default class scraper { - constructor(scraper_fields) { + /* + Scrape fields. + + @param {Object} scraper_fields the fields to scrape + @param {Object} options the options + */ + constructor(scraper_fields, options = {"wait until available": true}) { let field_content; // Quickly scroll down then to where the user already was to get automatically hidden content. function autoscroll() { - let SCROLL = {"x": window.scrollX, "y": window.scrollY}; + let SCROLL = {"x": parseInt(window.scrollX), "y": parseInt(window.scrollY)}; - // Repeat two times to ensure proper webpage load. - for (let TIMES = 1; TIMES <= 2; TIMES++) { - [{"top": 0, "left": 0, "behavior": "smooth"}, {"top": document.body.scrollHeight, "left": document.body.scrollWidth, "behavior": "smooth"}].forEach((POSITION) => { - window.scrollTo(POSITION); - }) - }; + // Repeat every ten milliseconds until 3 times. + + for (let SCROLLS = 1; SCROLLS <= 2; SCROLLS++) { + [{"top": document.body.scrollHeight, "left": document.body.scrollWidth}, {"top": 0, "left": 0}].forEach(POSITION => { + setTimeout(() => { + window.scrollTo(POSITION); + }, 10); + }); + } // Scroll back to user's previous position. - window.scrollTo({"top": SCROLL[`y`], "left": SCROLL[`x`], "behavior": "smooth"}); + setTimeout(() => { + window.scrollTo(SCROLL); + }, 5) }; - autoscroll(); - - if ((typeof scraper_fields).includes("object") && scraper_fields != null && scraper_fields) { - - /* Read for the particular fields. */ - function read(fields) { - let field_data = {}; - - (Object.keys(fields)).forEach((FIELD_NAME) => { - let FIELD = {"name": FIELD_NAME, "value": fields[FIELD_NAME]}; - - if (FIELD[`value`]) { - // Check if array. - if (Array.isArray(FIELD[`value`])) { - // Temporarily create an empty list. - field_data[FIELD[`name`]] = []; - - if (typeof FIELD[`value`][0] == "object" && FIELD[`value`][0] != null && !Array.isArray(FIELD[`value`][0])) { - field_data[FIELD[`name`]].push(read(FIELD[`value`][0])); - } else { - let ELEMENTS = (document.querySelectorAll(FIELD[`value`][0])); + const read = () => { + if ((typeof scraper_fields).includes("object") && scraper_fields != null && scraper_fields) { + + /* Read for the particular fields. */ + function read(fields) { + let field_data = {}; + + (Object.keys(fields)).forEach((FIELD_NAME) => { + let FIELD = {"name": FIELD_NAME, "value": fields[FIELD_NAME]}; + + if (FIELD[`value`]) { + // Check if array. + if (Array.isArray(FIELD[`value`])) { + // Temporarily create an empty list. + field_data[FIELD[`name`]] = []; - if (ELEMENTS.length > 0) { - (ELEMENTS).forEach((ELEMENT) => { - field_data[FIELD[`name`]].push(ELEMENT.innerText); - }) + if (typeof FIELD[`value`][0] == "object" && FIELD[`value`][0] != null && !Array.isArray(FIELD[`value`][0])) { + field_data[FIELD[`name`]].push(read(FIELD[`value`][0])); + } else { + let ELEMENTS = (document.querySelectorAll(FIELD[`value`][0])); + + if (ELEMENTS.length > 0) { + (ELEMENTS).forEach((ELEMENT) => { + field_data[FIELD[`name`]].push(ELEMENT.innerText); + }) + }; }; + } else if ((typeof FIELD[`value`]).includes(`obj`) && FIELD[`value`] != null) { + field_data[FIELD[`name`]] = read(FIELD[`value`]); + } else if (document.querySelector(FIELD[`value`])) { + field_data[FIELD[`name`]] = document.querySelector(FIELD[`value`]).innerText; }; - } else if ((typeof FIELD[`value`]).includes(`obj`) && FIELD[`value`] != null) { - field_data[FIELD[`name`]] = read(FIELD[`value`]); - } else if (document.querySelector(FIELD[`value`])) { - field_data[FIELD[`name`]] = document.querySelector(FIELD[`value`]).innerText; }; - }; + }); + + return field_data; + }; + field_content = read(scraper_fields); + } + + if (Object.keys(field_content).length > 0) { + (Object.keys(field_content)).forEach((field_name) => { + this[field_name] = field_content[field_name]; }); + } + }; - return field_data; - }; - field_content = read(scraper_fields); - } - - if (Object.keys(field_content).length > 0) { - (Object.keys(field_content)).forEach((field_name) => { - this[field_name] = field_content[field_name]; + // Check every 1 second to check until autosccroll is done. + function wait_autoscroll(OPTIONS) { + return new Promise((resolve, reject) => { + // Check if autoscroll is done. + if (!((typeof window).includes(`undef`))) { + autoscroll(); + resolve(); + } else if (OPTIONS[`wait until available`]) { + setTimeout(() => { + wait_autoscroll().then(resolve).catch(reject); + }, 1000); + } else { + reject(); + } }); } + wait_autoscroll(options).then(() => {read();}); } } \ No newline at end of file