mrmBot-Matrix/utils/imagedetect.js

210 lines
8.8 KiB
JavaScript

import { request } from "undici";
import { getType } from "./image.js";
import * as sdk from "matrix-js-sdk";
import { EventTimelineSet } from "matrix-js-sdk";
import { logger } from "./logger.js";
const tenorURLs = [
"tenor.com",
"www.tenor.com"
];
const giphyURLs = [
"giphy.com",
"www.giphy.com",
"i.giphy.com"
];
const giphyMediaURLs = [ // there could be more of these
"media.giphy.com",
"media0.giphy.com",
"media1.giphy.com",
"media2.giphy.com",
"media3.giphy.com",
"media4.giphy.com"
];
const imgurURLs = [
"imgur.com",
"www.imgur.com",
"i.imgur.com"
];
const gfycatURLs = [
"gfycat.com",
"www.gfycat.com",
"thumbs.gfycat.com",
"giant.gfycat.com"
];
const combined = [...tenorURLs, ...giphyURLs, ...giphyMediaURLs, ...imgurURLs, ...gfycatURLs];
const imageFormats = ["image/jpeg", "image/png", "image/webp", "image/gif", "large"];
const videoFormats = ["video/mp4", "video/webm", "video/mov"];
// check if url contents is valid url
const isValidUrl = urlString=> {
var urlPattern = new RegExp('^(https?:\\/\\/)?'+ // validate protocol
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|'+ // validate domain name
'((\\d{1,3}\\.){3}\\d{1,3}))'+ // validate OR ip (v4) address
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*'+ // validate port and path
'(\\?[;&a-z\\d%_.~+=-]*)?'+ // validate query string
'(\\#[-a-z\\d_]*)?$','i'); // validate fragment locator
return !!urlPattern.test(urlString);
}
// gets the proper image paths
const getImage = async (image, image2, video, extraReturnTypes, gifv = false, type = null, link = false) => {
try {
const fileNameSplit = new URL(image).pathname.split("/");
const fileName = fileNameSplit[fileNameSplit.length - 1];
const fileNameNoExtension = fileName.slice(0, fileName.lastIndexOf("."));
const payload = {
url: image2,
path: image,
name: fileNameNoExtension
};
const host = new URL(image2).host;
if (gifv || (link && combined.includes(host))) {
if (tenorURLs.includes(host)) {
// Tenor doesn't let us access a raw GIF without going through their API,
// so we use that if there's a key in the config
if (process.env.TENOR !== "") {
let id;
if (image2.includes("tenor.com/view/")) {
id = image2.split("-").pop();
} else if (image2.endsWith(".gif")) {
const redirect = (await request(image2, { method: "HEAD" })).headers.location;
id = redirect.split("-").pop();
}
const data = await request(`https://tenor.googleapis.com/v2/posts?ids=${id}&media_filter=gif&limit=1&client_key=esmBot%20${process.env.ESMBOT_VER}&key=${process.env.TENOR}`);
if (data.statusCode === 429) {
if (extraReturnTypes) {
payload.type = "tenorlimit";
return payload;
} else {
return;
}
}
const json = await data.body.json();
if (json.error) throw Error(json.error.message);
payload.path = json.results[0].media_formats.gif.url;
}
} else if (giphyURLs.includes(host)) {
// Can result in an HTML page instead of a GIF
payload.path = `https://media0.giphy.com/media/${image2.split("/")[4].split("-").pop()}/giphy.gif`;
} else if (giphyMediaURLs.includes(host)) {
payload.path = `https://media0.giphy.com/media/${image2.split("/")[4]}/giphy.gif`;
} else if (imgurURLs.includes(host)) {
// Seems that Imgur has a possibility of making GIFs static
payload.path = image.replace(".mp4", ".gif");
} else if (gfycatURLs.includes(host)) {
// iirc Gfycat also seems to sometimes make GIFs static
if (link) {
const data = await request(`https://api.gfycat.com/v1/gfycats/${image.split("/").pop().split(".mp4")[0]}`);
const json = await data.body.json();
if (json.errorMessage) throw Error(json.errorMessage);
payload.path = json.gfyItem.gifUrl;
} else {
payload.path = `https://thumbs.gfycat.com/${image.split("/").pop().split(".mp4")[0]}-size_restricted.gif`;
}
}
payload.type = "image/gif";
} else if (video) {
payload.type = type ?? await getType(payload.path, extraReturnTypes);
if (!payload.type || (!videoFormats.includes(payload.type) && !imageFormats.includes(payload.type))) return;
} else {
payload.type = type ?? await getType(payload.path, extraReturnTypes);
if (!payload.type || !imageFormats.includes(payload.type)) return;
}
return payload;
} catch (error) {
if (error.name === "AbortError") {
throw Error("Timed out");
} else {
throw error;
}
}
};
const urlFromMxc = async (mxcUri) => {
const stripped = mxcUri.replace("mxc://", "")
return process.env.MATRIX_BASEURL+"/_matrix/media/r0/download/"+stripped
}
const checkImages = async (message, extraReturnTypes, video, sticker) => {
let type;
if (typeof message.content.info) {
if (message.content.msgtype == "m.image") {
const url = await urlFromMxc(message.content.url)
const fileNameNoExtension = message.content.body.slice(0, message.content.body.lastIndexOf("."));
type = {name: fileNameNoExtension, path: url, url: url, type: message.content.info.mimetype}
}
}
if (message.content.msgtype == "m.text") {
let url = message.content.body
url = url.replace(/.*\n\n/g, "")
if (isValidUrl(url)){
const mimetype = await getType(url, extraReturnTypes)
type = {name: "image", path: url, url: url, type: mimetype}
}
}
// // first check the embeds
// if (message.embeds.length !== 0) {
// // embeds can vary in types, we check for tenor gifs first
// if (message.embeds[0].type === "gifv") {
// type = await getImage(message.embeds[0].video.url, message.embeds[0].url, video, extraReturnTypes, true);
// // then we check for other image types
// } else if ((message.embeds[0].type === "video" || message.embeds[0].type === "image") && message.embeds[0].thumbnail) {
// type = await getImage(message.embeds[0].thumbnail.proxyURL, message.embeds[0].thumbnail.url, video, extraReturnTypes);
// // finally we check both possible image fields for "generic" embeds
// } else if (message.embeds[0].type === "rich" || message.embeds[0].type === "article") {
// if (message.embeds[0].thumbnail) {
// type = await getImage(message.embeds[0].thumbnail.proxyURL, message.embeds[0].thumbnail.url, video, extraReturnTypes);
// } else if (message.embeds[0].image) {
// type = await getImage(message.embeds[0].image.proxyURL, message.embeds[0].image.url, video, extraReturnTypes);
// }
// }
// // then check the attachments
// } else if (message.attachments.size !== 0 && message.attachments.first().width) {
// type = await getImage(message.attachments.first().proxyURL, message.attachments.first().url, video);
// }
// if the return value exists then return it
return type ?? false;
};
// this checks for the latest message containing an image and returns the url of the image
export default async (client, cmdMessage, interaction, options, extraReturnTypes = false, video = false, sticker = false, singleMessage = false) => {
// we start by determining whether or not we're dealing with an interaction or a message
if (cmdMessage) {
if (cmdMessage.content['m.relates_to'] !== undefined) {
const replyMessage = await client.fetchRoomEvent(cmdMessage.room_id, cmdMessage.content['m.relates_to']['m.in_reply_to'].event_id)
if (replyMessage) {
const replyResult = await checkImages(replyMessage, extraReturnTypes, video, sticker);
if (replyResult !== false) return replyResult;
}
}
// then we check the current message
const result = await checkImages(cmdMessage, extraReturnTypes, video, sticker);
if (result !== false) return result;
}
if (!singleMessage) {
// if there aren't any replies or interaction attachments then iterate over the last few messages in the channel
try {
const channel = cmdMessage.room_id;
await client.store.getPendingEvents(channel)
const room = await client.getRoom(channel)
const timeline = await room.getLiveTimeline()
let pagination = timeline.setPaginationToken(null, sdk.EventTimeline.BACKWARDS)
for (const event of timeline.events.reverse()) {
const result = await checkImages(event.event, extraReturnTypes, video, sticker);
if (result === false) {
continue;
} else {
return result;
}
}
} catch (error) {
// log error
logger.log("error", error)
}
}
};