Consider changes in height

attempt to wait until window is available
This commit is contained in:
buzz-lightsnack-2007 2024-05-05 22:54:57 +08:00
parent da0ba0a40d
commit 2a7c2fc750

View file

@ -3,69 +3,98 @@ Read the contents of the page.
*/ */
export default class scraper { export default class scraper {
constructor(scraper_fields) { /*
Scrape fields.
@param {Object} scraper_fields the fields to scrape
@param {Object} options the options
*/
constructor(scraper_fields, options = {"wait until available": true}) {
let field_content; let field_content;
// Quickly scroll down then to where the user already was to get automatically hidden content. // Quickly scroll down then to where the user already was to get automatically hidden content.
function autoscroll() { function autoscroll() {
let SCROLL = {"x": window.scrollX, "y": window.scrollY}; let SCROLL = {"x": parseInt(window.scrollX), "y": parseInt(window.scrollY)};
// Repeat two times to ensure proper webpage load. // Repeat every ten milliseconds until 3 times.
for (let TIMES = 1; TIMES <= 2; TIMES++) {
[{"top": 0, "left": 0, "behavior": "smooth"}, {"top": document.body.scrollHeight, "left": document.body.scrollWidth, "behavior": "smooth"}].forEach((POSITION) => { for (let SCROLLS = 1; SCROLLS <= 2; SCROLLS++) {
window.scrollTo(POSITION); [{"top": document.body.scrollHeight, "left": document.body.scrollWidth}, {"top": 0, "left": 0}].forEach(POSITION => {
}) setTimeout(() => {
}; window.scrollTo(POSITION);
}, 10);
});
}
// Scroll back to user's previous position. // Scroll back to user's previous position.
window.scrollTo({"top": SCROLL[`y`], "left": SCROLL[`x`], "behavior": "smooth"}); setTimeout(() => {
window.scrollTo(SCROLL);
}, 5)
}; };
autoscroll(); const read = () => {
if ((typeof scraper_fields).includes("object") && scraper_fields != null && scraper_fields) {
if ((typeof scraper_fields).includes("object") && scraper_fields != null && scraper_fields) {
/* Read for the particular fields. */
/* Read for the particular fields. */ function read(fields) {
function read(fields) { let field_data = {};
let field_data = {};
(Object.keys(fields)).forEach((FIELD_NAME) => {
(Object.keys(fields)).forEach((FIELD_NAME) => { let FIELD = {"name": FIELD_NAME, "value": fields[FIELD_NAME]};
let FIELD = {"name": FIELD_NAME, "value": fields[FIELD_NAME]};
if (FIELD[`value`]) {
if (FIELD[`value`]) { // Check if array.
// Check if array. if (Array.isArray(FIELD[`value`])) {
if (Array.isArray(FIELD[`value`])) { // Temporarily create an empty list.
// Temporarily create an empty list. field_data[FIELD[`name`]] = [];
field_data[FIELD[`name`]] = [];
if (typeof FIELD[`value`][0] == "object" && FIELD[`value`][0] != null && !Array.isArray(FIELD[`value`][0])) {
field_data[FIELD[`name`]].push(read(FIELD[`value`][0]));
} else {
let ELEMENTS = (document.querySelectorAll(FIELD[`value`][0]));
if (ELEMENTS.length > 0) { if (typeof FIELD[`value`][0] == "object" && FIELD[`value`][0] != null && !Array.isArray(FIELD[`value`][0])) {
(ELEMENTS).forEach((ELEMENT) => { field_data[FIELD[`name`]].push(read(FIELD[`value`][0]));
field_data[FIELD[`name`]].push(ELEMENT.innerText); } else {
}) let ELEMENTS = (document.querySelectorAll(FIELD[`value`][0]));
if (ELEMENTS.length > 0) {
(ELEMENTS).forEach((ELEMENT) => {
field_data[FIELD[`name`]].push(ELEMENT.innerText);
})
};
}; };
} else if ((typeof FIELD[`value`]).includes(`obj`) && FIELD[`value`] != null) {
field_data[FIELD[`name`]] = read(FIELD[`value`]);
} else if (document.querySelector(FIELD[`value`])) {
field_data[FIELD[`name`]] = document.querySelector(FIELD[`value`]).innerText;
}; };
} else if ((typeof FIELD[`value`]).includes(`obj`) && FIELD[`value`] != null) {
field_data[FIELD[`name`]] = read(FIELD[`value`]);
} else if (document.querySelector(FIELD[`value`])) {
field_data[FIELD[`name`]] = document.querySelector(FIELD[`value`]).innerText;
}; };
}; });
return field_data;
};
field_content = read(scraper_fields);
}
if (Object.keys(field_content).length > 0) {
(Object.keys(field_content)).forEach((field_name) => {
this[field_name] = field_content[field_name];
}); });
}
};
return field_data; // Check every 1 second to check until autosccroll is done.
}; function wait_autoscroll(OPTIONS) {
field_content = read(scraper_fields); return new Promise((resolve, reject) => {
} // Check if autoscroll is done.
if (!((typeof window).includes(`undef`))) {
if (Object.keys(field_content).length > 0) { autoscroll();
(Object.keys(field_content)).forEach((field_name) => { resolve();
this[field_name] = field_content[field_name]; } else if (OPTIONS[`wait until available`]) {
setTimeout(() => {
wait_autoscroll().then(resolve).catch(reject);
}, 1000);
} else {
reject();
}
}); });
} }
wait_autoscroll(options).then(() => {read();});
} }
} }