scrape images aside from texts
This commit is contained in:
parent
af996cfc1d
commit
7ad7072456
1 changed files with 129 additions and 15 deletions
144
scripts/external/scraper.js
vendored
144
scripts/external/scraper.js
vendored
|
@ -79,22 +79,136 @@ export default class scraper {
|
|||
}
|
||||
};
|
||||
|
||||
// Check every 1 second to check until autosccroll is done.
|
||||
function wait_autoscroll(OPTIONS) {
|
||||
return new Promise((resolve, reject) => {
|
||||
// Check if autoscroll is done.
|
||||
if (!((typeof window).includes(`undef`))) {
|
||||
autoscroll();
|
||||
resolve();
|
||||
} else if (OPTIONS[`wait until available`]) {
|
||||
setTimeout(() => {
|
||||
wait_autoscroll().then(resolve).catch(reject);
|
||||
}, 1000);
|
||||
} else {
|
||||
reject();
|
||||
/*
|
||||
Scrape the images from a page.
|
||||
|
||||
@param {Object} fields the fields to scrape
|
||||
@param {Object} options the options
|
||||
@return {Object} the blob of the images
|
||||
*/
|
||||
async getImages(fields, options) {
|
||||
let CONTENT;
|
||||
|
||||
/*
|
||||
Get the blob of the image in an element.
|
||||
|
||||
@param {Element} element the element to get the blob from
|
||||
@return {Blob} the blob of the image
|
||||
*/
|
||||
async function blobbify(element) {
|
||||
/*
|
||||
Get the URL of the image.
|
||||
|
||||
@param {Element} element the element to get the URL from
|
||||
@return {String} the URL of the image
|
||||
*/
|
||||
function reference(element) {
|
||||
let LOCATION;
|
||||
|
||||
// Get using standard attributes.
|
||||
LOCATION = element.getAttribute(`src`);
|
||||
|
||||
if (!LOCATION) {
|
||||
// Use the CSS background image.
|
||||
(window.getComputedStyle(element).backgroundImage)
|
||||
? LOCATION = window.getComputedStyle(element).backgroundImage.slice(4, -1).replace(/"/g, "")
|
||||
: false;
|
||||
}
|
||||
});
|
||||
|
||||
// Return the location.
|
||||
return LOCATION;
|
||||
}
|
||||
|
||||
/*
|
||||
Get the blob from the URL.
|
||||
|
||||
@param {String} URL the URL to get the blob from
|
||||
@return {Blob} the blob of the image
|
||||
*/
|
||||
function getBlob(URL) {
|
||||
return(net.download(URL, `blob`));
|
||||
}
|
||||
|
||||
let LOCATION = reference(element);
|
||||
let BLOB = await getBlob(LOCATION);
|
||||
|
||||
return ((BLOB.type.includes(`image`)) ? BLOB : null);
|
||||
}
|
||||
wait_autoscroll(options).then(() => {read();});
|
||||
|
||||
/* Read for the particular fields. */
|
||||
async function read(fields) {
|
||||
/*
|
||||
Select all images from an element and get their blobs.
|
||||
|
||||
@param {Element} element the element to get the images from
|
||||
@return {Array} the blobs of the images
|
||||
*/
|
||||
async function select(element) {
|
||||
let IMAGES = [...element.querySelectorAll(`*`)];
|
||||
let BLOBS = [];
|
||||
|
||||
if (IMAGES && IMAGES.length) {
|
||||
for (let IMAGE of IMAGES) {
|
||||
let BLOB = await blobbify(IMAGE);
|
||||
(BLOB) ? BLOBS.push(BLOB) : false;
|
||||
}
|
||||
}
|
||||
|
||||
return BLOBS;
|
||||
}
|
||||
|
||||
let DATA = []; // Store here the resulting data
|
||||
|
||||
for (let NAME of Object.keys(fields)) {
|
||||
// Remove trailing spaces within the name.
|
||||
NAME = (typeof NAME).includes(`str`) ? NAME.trim() : NAME;
|
||||
let VALUE = fields[NAME];
|
||||
|
||||
if (VALUE && NAME) {
|
||||
// Check if array.
|
||||
if (Array.isArray(VALUE)) {
|
||||
// Temporarily create an empty list.
|
||||
for (let PARTICULAR of VALUE) {
|
||||
if ((typeof PARTICULAR).includes(`obj`) && PARTICULAR && !Array.isArray(PARTICULAR)) {
|
||||
DATA = [...DATA, ...(await read(PARTICULAR))];
|
||||
} else {
|
||||
let ELEMENTS = [...(document.querySelectorAll(PARTICULAR))];
|
||||
|
||||
if (ELEMENTS && ELEMENTS.length) {
|
||||
for (let ELEMENT of ELEMENTS) {
|
||||
let BLOBS = await select(ELEMENT);
|
||||
if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if ((typeof VALUE).includes(`obj`) && VALUE) {
|
||||
DATA = [...DATA, ...(await read(VALUE))];
|
||||
} else if (document.querySelector(VALUE)) {
|
||||
let ELEMENTS = [...(document.querySelectorAll(VALUE))];
|
||||
|
||||
if (ELEMENTS && ELEMENTS.length) {
|
||||
for (let ELEMENT of ELEMENTS) {
|
||||
let BLOBS = await select(ELEMENT);
|
||||
if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (DATA);
|
||||
};
|
||||
|
||||
// Read the fields.
|
||||
(((typeof fields).includes(`obj`) && fields) ? Object.keys(fields).length : false)
|
||||
? CONTENT = await read(fields)
|
||||
: false;
|
||||
|
||||
// Set the data if the options doesn't indicate otherwise.
|
||||
(((((typeof options).includes(`obj`) && options) ? Object.hasOwn(`update`) : false) ? options[`update`] : true) && CONTENT)
|
||||
? this.images = CONTENT
|
||||
: false;
|
||||
return (CONTENT);
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue