disable image scraping

due to being IP flagged
This commit is contained in:
buzz-lightsnack-2007 2024-05-25 10:05:44 +08:00
parent 00c0069fa9
commit 349d16b06d

View file

@ -155,134 +155,139 @@ export default class scraper {
/*
Scrape the images from a page.
It's temporarily disabled due to consequent flagging of the IP address. Also it's output is not yet implemented. This is a future point of expansion (Crit E).
@param {Object} fields the fields to scrape
@param {Object} options the options
@return {Object} the blob of the images
*/
async getImages(fields, options) {
let CONTENT;
let DISABLE = true // This is how to disable it
/*
Get the blob of the image in an element.
@param {Element} element the element to get the blob from
@return {Blob} the blob of the image
*/
async function blobbify(element) {
/*
Get the URL of the image.
@param {Element} element the element to get the URL from
@return {String} the URL of the image
*/
function reference(element) {
let LOCATION;
// Get using standard attributes.
LOCATION = element.getAttribute(`src`);
if (!LOCATION) {
// Use the CSS background image.
(window.getComputedStyle(element).backgroundImage)
? LOCATION = window.getComputedStyle(element).backgroundImage.slice(4, -1).replace(/"/g, "")
: false;
}
// Return the location.
return LOCATION;
}
if (!DISABLE) {
let CONTENT;
/*
Get the blob from the URL.
Get the blob of the image in an element.
@param {String} URL the URL to get the blob from
@param {Element} element the element to get the blob from
@return {Blob} the blob of the image
*/
function getBlob(URL) {
return(net.download(URL, `blob`));
}
async function blobbify(element) {
/*
Get the URL of the image.
let LOCATION = reference(element);
let BLOB = await getBlob(LOCATION);
@param {Element} element the element to get the URL from
@return {String} the URL of the image
*/
function reference(element) {
let LOCATION;
return ((BLOB.type.includes(`image`)) ? BLOB : null);
}
// Get using standard attributes.
LOCATION = element.getAttribute(`src`);
/* Read for the particular fields. */
async function read(fields) {
/*
Select all images from an element and get their blobs.
@param {Element} element the element to get the images from
@return {Array} the blobs of the images
*/
async function select(element) {
let IMAGES = [...element.querySelectorAll(`*`)];
let BLOBS = [];
if (IMAGES && IMAGES.length) {
for (let IMAGE of IMAGES) {
let BLOB = await blobbify(IMAGE);
(BLOB) ? BLOBS.push(BLOB) : false;
if (!LOCATION) {
// Use the CSS background image.
(window.getComputedStyle(element).backgroundImage)
? LOCATION = window.getComputedStyle(element).backgroundImage.slice(4, -1).replace(/"/g, "")
: false;
}
// Return the location.
return LOCATION;
}
return BLOBS;
/*
Get the blob from the URL.
@param {String} URL the URL to get the blob from
@return {Blob} the blob of the image
*/
function getBlob(URL) {
return(net.download(URL, `blob`));
}
let LOCATION = reference(element);
let BLOB = await getBlob(LOCATION);
return ((BLOB.type.includes(`image`)) ? BLOB : null);
}
let DATA = []; // Store here the resulting data
/* Read for the particular fields. */
async function read(fields) {
/*
Select all images from an element and get their blobs.
for (let NAME of Object.keys(fields)) {
// Remove trailing spaces within the name.
NAME = (typeof NAME).includes(`str`) ? NAME.trim() : NAME;
let VALUE = fields[NAME];
@param {Element} element the element to get the images from
@return {Array} the blobs of the images
*/
async function select(element) {
let IMAGES = [...element.querySelectorAll(`*`)];
let BLOBS = [];
if (VALUE && NAME) {
// Check if array.
if (Array.isArray(VALUE)) {
// Temporarily create an empty list.
for (let PARTICULAR of VALUE) {
if ((typeof PARTICULAR).includes(`obj`) && PARTICULAR && !Array.isArray(PARTICULAR)) {
DATA = [...DATA, ...(await read(PARTICULAR))];
} else {
let ELEMENTS = [...(document.querySelectorAll(PARTICULAR))];
if (IMAGES && IMAGES.length) {
for (let IMAGE of IMAGES) {
let BLOB = await blobbify(IMAGE);
(BLOB) ? BLOBS.push(BLOB) : false;
}
}
if (ELEMENTS && ELEMENTS.length) {
for (let ELEMENT of ELEMENTS) {
let BLOBS = await select(ELEMENT);
if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS];
return BLOBS;
}
let DATA = []; // Store here the resulting data
for (let NAME of Object.keys(fields)) {
// Remove trailing spaces within the name.
NAME = (typeof NAME).includes(`str`) ? NAME.trim() : NAME;
let VALUE = fields[NAME];
if (VALUE && NAME) {
// Check if array.
if (Array.isArray(VALUE)) {
// Temporarily create an empty list.
for (let PARTICULAR of VALUE) {
if ((typeof PARTICULAR).includes(`obj`) && PARTICULAR && !Array.isArray(PARTICULAR)) {
DATA = [...DATA, ...(await read(PARTICULAR))];
} else {
let ELEMENTS = [...(document.querySelectorAll(PARTICULAR))];
if (ELEMENTS && ELEMENTS.length) {
for (let ELEMENT of ELEMENTS) {
let BLOBS = await select(ELEMENT);
if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS];
}
}
}
}
}
} else if ((typeof VALUE).includes(`obj`) && VALUE) {
DATA = [...DATA, ...(await read(VALUE))];
} else if (document.querySelector(VALUE)) {
let ELEMENTS = [...(document.querySelectorAll(VALUE))];
} else if ((typeof VALUE).includes(`obj`) && VALUE) {
DATA = [...DATA, ...(await read(VALUE))];
} else if (document.querySelector(VALUE)) {
let ELEMENTS = [...(document.querySelectorAll(VALUE))];
if (ELEMENTS && ELEMENTS.length) {
for (let ELEMENT of ELEMENTS) {
let BLOBS = await select(ELEMENT);
if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS];
if (ELEMENTS && ELEMENTS.length) {
for (let ELEMENT of ELEMENTS) {
let BLOBS = await select(ELEMENT);
if (BLOBS && BLOBS.length) DATA = [...DATA, ...BLOBS];
}
}
}
}
}
}
return (DATA);
};
return (DATA);
};
// Read the fields.
(((typeof fields).includes(`obj`) && fields) ? Object.keys(fields).length : false)
? CONTENT = await read(fields)
: false;
// Read the fields.
(((typeof fields).includes(`obj`) && fields) ? Object.keys(fields).length : false)
? CONTENT = await read(fields)
: false;
// Set the data if the options doesn't indicate otherwise.
(((((typeof options).includes(`obj`) && options) ? Object.hasOwn(`update`) : false) ? options[`update`] : true) && CONTENT)
? this.images = CONTENT
: false;
return (CONTENT);
// Set the data if the options doesn't indicate otherwise.
(((((typeof options).includes(`obj`) && options) ? Object.hasOwn(`update`) : false) ? options[`update`] : true) && CONTENT)
? this.images = CONTENT
: false;
return (CONTENT);
}
}
}