mirror of
https://github.com/polyjitter/searchbot-discord.git
synced 2024-08-14 22:46:55 +00:00
Moving search to Qwant
This commit is contained in:
parent
19acae2abc
commit
391beaf422
6 changed files with 422 additions and 229 deletions
|
@ -267,6 +267,14 @@ Guild count: {len(self.bot.guilds)}
|
|||
# Message Sending
|
||||
await ctx.send(msg)
|
||||
|
||||
@commands.command()
|
||||
@commands.is_owner()
|
||||
async def toggle_debug(self, ctx):
|
||||
"""Toggles debug while running."""
|
||||
|
||||
self.bot.debug_toggle = not self.bot.debug_toggle
|
||||
await ctx.send(f"Set debug mode to `{self.bot.debug_toggle}`.")
|
||||
|
||||
@commands.command(aliases=['exit', 'reboot'])
|
||||
@commands.is_owner()
|
||||
async def restart(self, ctx):
|
||||
|
|
17
extensions/models/SearchExceptions.py
Normal file
17
extensions/models/SearchExceptions.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# search exceptions
|
||||
# Provides custom exceptions for the search cog.
|
||||
|
||||
"""Search Exceptions File"""
|
||||
|
||||
from discord.ext import commands
|
||||
|
||||
|
||||
class SafesearchFail(commands.CommandError):
|
||||
"""Thrown when a query contains NSFW content."""
|
||||
pass
|
||||
|
||||
|
||||
def setup(bot):
|
||||
pass
|
|
@ -10,6 +10,8 @@ from discord.ext import commands
|
|||
import aiohttp
|
||||
import random
|
||||
from typing import List
|
||||
from extensions.models import SearchExceptions
|
||||
import html2text
|
||||
|
||||
|
||||
class Search(commands.Cog, name="Basic"):
|
||||
|
@ -21,193 +23,153 @@ class Search(commands.Cog, name="Basic"):
|
|||
self.bot = bot
|
||||
self.info = bot.logging.info
|
||||
self.warn = bot.logging.warn
|
||||
self.debug = bot.logging.debug
|
||||
self.request = bot.request
|
||||
self.emoji = "\U0001F50D"
|
||||
self.scrape_token = bot.config['SCRAPESTACK']
|
||||
|
||||
# Get Instances
|
||||
with open('searxes.txt') as f:
|
||||
self.instances = f.read().split('\n')
|
||||
# Markdown converter
|
||||
self.tomd = html2text.HTML2Text()
|
||||
self.tomd.ignore_links = True
|
||||
self.tomd.ignore_images = True
|
||||
self.tomd.ignore_tables = True
|
||||
self.tomd.ignore_emphasis = True
|
||||
self.tomd.body_width = 0
|
||||
|
||||
async def _search_logic(self, query: str, is_nsfw: bool = False,
|
||||
category: str = None) -> str:
|
||||
"""Provides search logic for all search commands."""
|
||||
category: str = 'web', count: int = 5) -> list:
|
||||
"""Uses scrapestack and the Qwant API to find search results."""
|
||||
|
||||
# Typing
|
||||
base: str
|
||||
safesearch: str
|
||||
|
||||
# NSFW Filtering
|
||||
# WARNING - This list includes slurs.
|
||||
# WARNING This list includes slurs.
|
||||
nono_words = [
|
||||
'tranny', 'faggot', 'fag',
|
||||
'porn', 'cock', 'dick',
|
||||
'titty', 'boob', 'penis',
|
||||
'slut', 'cum', 'jizz',
|
||||
'semen', 'cooch', 'coochie',
|
||||
'pussy', 'penis', 'fetish',
|
||||
'bdsm', 'sexy', 'xxx',
|
||||
'orgasm', 'masturbation',
|
||||
'erotic', 'creampie',
|
||||
'fap', 'nude', 'orgasm',
|
||||
'squirting', 'yiff',
|
||||
'e621'
|
||||
]
|
||||
nono_sites = [
|
||||
'xvideos', 'pornhub',
|
||||
'xhamster', 'xnxx',
|
||||
'youporn', 'xxx',
|
||||
'freexcafe', 'sex.com',
|
||||
'e621'
|
||||
'tranny', 'faggot', 'fag', 'porn', 'cock', 'dick',
|
||||
'titty', ' tit ', 'boob', 'penis', 'slut', ' cum ', 'jizz',
|
||||
'semen', 'cooch', 'coochie', 'pussy', 'penis', 'fetish',
|
||||
'bdsm', 'sexy', 'xxx', 'orgasm', 'masturbat',
|
||||
'erotic', 'creampie', 'fap', 'nude', 'orgasm',
|
||||
'squirting', 'yiff', 'e621', ' sex', 'ejaculat',
|
||||
'cunt', 'vagina', 'coom', 'troon', 'hentai', 'yaoi',
|
||||
'bukkake', 'bara', 'shota', 'loli', 'fetish', 'spunk',
|
||||
'pron', 'p0rn', 'pr0n', 'gloryhole', 'felch', 'skullfuck',
|
||||
'scat', 'pissplay', 'piss play', 'underage', 'bbw',
|
||||
'fisting', 'queef', "rimming", 'rimjob', 'bdsm',
|
||||
'cbt', 'blumpkin', 'boner', 'prostitut', 'butt plug',
|
||||
'transvestite', 'femboy', 'castrat', 'philia', 'edging',
|
||||
'edgeplay', 'enema', 'facial', 'fellat', 'femdom', 'footjob',
|
||||
'blowjob', 'titjob', 'handjob', 'frot', 'gang bang', 'gangbang',
|
||||
'glory hole', 'hermap', 'jerk off', 'jerking off', 'jack off',
|
||||
'jacking off', 'kink', 'wet dream', 'anal', 'pegging', 'precum',
|
||||
'pre-cum', 'pre cum', 'priap', 'scrotum', 'shemale', 'smegma',
|
||||
'smut', 'softcore', 'transsexual', 'voyeur', 'viagra', 'wank',
|
||||
'whore'
|
||||
]
|
||||
|
||||
if not is_nsfw:
|
||||
for i in nono_words:
|
||||
if i in query.replace(" ", ""):
|
||||
return (
|
||||
"**Sorry!** That query included language "
|
||||
"we cannot accept in a non-NSFW channel. "
|
||||
"Please try again in an NSFW channel."
|
||||
)
|
||||
if any(n in query for n in nono_words):
|
||||
raise SearchExceptions.SafesearchFail('Query had NSFW.')
|
||||
|
||||
# Choose an instance
|
||||
if self.instances == []:
|
||||
with open('searxes.txt') as f:
|
||||
self.instances = f.read().split('\n')
|
||||
instance = random.sample(self.instances, k=1)[0]
|
||||
|
||||
# Error Template
|
||||
error_msg = (
|
||||
"**An error occured!**\n\n"
|
||||
f"There was a problem with `{instance}`. Please try again later.\n"
|
||||
f"_If problems with this instance persist, contact`{self.bot.appinfo.owner}` to have it removed._"
|
||||
)
|
||||
|
||||
# Create the URL to make an API call to
|
||||
call = f'{instance}search?q={query}&format=json&language=en-US'
|
||||
|
||||
# If a type is provided, add that type to the call URL
|
||||
if category:
|
||||
call += f'&categories={category}'
|
||||
# Scrape or not
|
||||
# if self.scrape_token != '':
|
||||
# base = (
|
||||
# "http://api.scrapestack.com/scrape"
|
||||
# f"?access_key={self.scrape_token}"
|
||||
# f"&url=https://api.qwant.com/api"
|
||||
# )
|
||||
# print(base)
|
||||
# else:
|
||||
base = "https://api.qwant.com/api"
|
||||
|
||||
# Safesearch
|
||||
if is_nsfw:
|
||||
call += '&safesearch=0'
|
||||
safesearch = "0"
|
||||
else:
|
||||
call += '&safesearch=1'
|
||||
safesearch = "2"
|
||||
|
||||
# Figure out engines for different categories to get decent results.
|
||||
if category == 'videos':
|
||||
call += '&engines=bing+videos,google+videos'
|
||||
# Make said API call
|
||||
try:
|
||||
async with self.request.get(call) as resp:
|
||||
response = await resp.json()
|
||||
except aiohttp.ClientError:
|
||||
return error_msg
|
||||
|
||||
# Split our response data up for parsing
|
||||
# infoboxes = response['infoboxes']
|
||||
results = response['results']
|
||||
|
||||
# Create message with results
|
||||
try:
|
||||
# Handle tiny result count
|
||||
if len(results) > 5:
|
||||
amt = 5
|
||||
else:
|
||||
amt = len(results)
|
||||
|
||||
# Remove no-no sites
|
||||
if not is_nsfw:
|
||||
for r in results[0:7]:
|
||||
for n in nono_sites:
|
||||
if n in r['url']:
|
||||
results.remove(r)
|
||||
|
||||
# Escape stuff
|
||||
query = discord.utils.escape_mentions(query)
|
||||
query = discord.utils.escape_markdown(query)
|
||||
|
||||
# Header
|
||||
msg = f"Showing **{amt}** results for `{query}`. \n\n"
|
||||
# Expanded Result
|
||||
msg += (
|
||||
f"**{results[0]['title']}** <{results[0]['url']}>\n"
|
||||
f"{results[0]['content']}\n\n")
|
||||
# Other Results
|
||||
msg += "\n".join(
|
||||
[f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
|
||||
# Instance Info
|
||||
msg += f"\n\n_Results retrieved from instance `{instance}`._"
|
||||
|
||||
return msg
|
||||
|
||||
# Reached if error with returned results
|
||||
except (KeyError, IndexError) as e:
|
||||
# Logging
|
||||
await self.warn(
|
||||
f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
|
||||
"Consider removing it or looking into it.",
|
||||
name="Failed Instance"
|
||||
# Search URL Building
|
||||
# api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...
|
||||
search_url = (
|
||||
f"{base}/search/{category}"
|
||||
f"?count={count}"
|
||||
f"&q={query}"
|
||||
f"&safesearch={safesearch}"
|
||||
"&t=web"
|
||||
"&locale=en_US"
|
||||
"&uiv=4"
|
||||
)
|
||||
await self.debug(search_url, name="_search_logic")
|
||||
|
||||
self.instances.remove(instance) # Weed the instance out
|
||||
# Recurse until good response
|
||||
return await self._search_logic(query, is_nsfw)
|
||||
# Searching
|
||||
headers = {
|
||||
'User-Agent': (
|
||||
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'
|
||||
' Gecko/20100101 Firefox/74.0'
|
||||
)
|
||||
}
|
||||
async with self.request.get(search_url, headers=headers) as resp:
|
||||
to_parse = await resp.json()
|
||||
|
||||
async def _instance_check(self, instance: str, content: dict) -> bool:
|
||||
"""Checks the quality of an instance."""
|
||||
# Sends results
|
||||
return to_parse['data']['result']['items']
|
||||
|
||||
# Makes sure proper values exist
|
||||
if 'error' in content:
|
||||
return False
|
||||
if not ('engines' in content and 'initial' in content['timing']):
|
||||
return False
|
||||
if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):
|
||||
return False
|
||||
async def _basic_search(self, ctx, query: str, category: str = 'web'):
|
||||
"""Basic search formatting."""
|
||||
|
||||
# Makes sure google is enabled
|
||||
if not content['engines']['google']['enabled']:
|
||||
return False
|
||||
# NOTE Customizable count not yet implemented.
|
||||
count: int = 5
|
||||
|
||||
# Makes sure is not Tor
|
||||
if content['network_type'] != 'normal':
|
||||
return False
|
||||
|
||||
# Only picks instances that are fast enough
|
||||
timing = int(content['timing']['initial'])
|
||||
if timing > 0.20:
|
||||
return False
|
||||
|
||||
# Check for Google captcha
|
||||
test_search = f'{instance}/search?q=test&format=json&lang=en-US'
|
||||
try:
|
||||
async with self.request.get(test_search) as resp:
|
||||
response = await resp.json()
|
||||
response['results'][0]['content']
|
||||
except (aiohttp.ClientError, KeyError, IndexError):
|
||||
return False
|
||||
|
||||
# Reached if passes all checks
|
||||
return True
|
||||
|
||||
async def _basic_search(self, ctx, query: str,
|
||||
category: str = None):
|
||||
"""Base search message generation."""
|
||||
|
||||
async with ctx.typing():
|
||||
# Safesearch variable
|
||||
is_nsfw = (
|
||||
ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
|
||||
else False
|
||||
)
|
||||
|
||||
msg = await self._search_logic(query, is_nsfw, category)
|
||||
await ctx.send(msg)
|
||||
# Handling
|
||||
async with ctx.typing():
|
||||
|
||||
await self.info(
|
||||
content=(
|
||||
f"**{ctx.author}** searched for `{query}` "
|
||||
f"in \"{ctx.guild}\" and got this:"
|
||||
f"\n\n{msg}"
|
||||
),
|
||||
name="Search Results"
|
||||
# Searches
|
||||
results = await self._search_logic(query, is_nsfw, category)
|
||||
count = len(results)
|
||||
|
||||
# Escapes all nasties for displaying
|
||||
query_display = discord.utils.escape_mentions(query)
|
||||
query_display = discord.utils.escape_markdown(query_display)
|
||||
|
||||
# Return if no results
|
||||
try:
|
||||
results[0]
|
||||
except IndexError:
|
||||
return await ctx.send(
|
||||
f"No results found for `{query_display}`."
|
||||
)
|
||||
|
||||
# Gets the first entry's stuff
|
||||
first_title = self.tomd.handle(results[0]['title']).rstrip('\n')
|
||||
first_url = results[0]['url']
|
||||
first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')
|
||||
|
||||
# Builds the substring for each of the other results.
|
||||
other_results: List[str] = []
|
||||
for r in results[1:count]:
|
||||
title = self.tomd.handle(r['title']).rstrip('\n')
|
||||
url = results[0]['url']
|
||||
other_results.append(f"**{title}** <{url}>")
|
||||
other_msg: str = "\n".join(other_results)
|
||||
|
||||
# Builds message
|
||||
msg = (
|
||||
f"Showing **{count}** results for `{query_display}`.\n\n"
|
||||
f"**{first_title}** <{first_url}>\n{first_desc}\n\n"
|
||||
f"{other_msg}\n\n_Powered by Qwant._"
|
||||
)
|
||||
|
||||
# Sends message
|
||||
await self.debug(msg, name="_basic_search")
|
||||
await ctx.send(msg)
|
||||
|
||||
@commands.command()
|
||||
async def search(self, ctx, *, query: str):
|
||||
"""Search online for general results."""
|
||||
|
@ -250,51 +212,27 @@ class Search(commands.Cog, name="Basic"):
|
|||
|
||||
await self._basic_search(ctx, query, 'maps')
|
||||
|
||||
@commands.command()
|
||||
@commands.is_owner()
|
||||
async def rejson(self, ctx):
|
||||
"""Refreshes the list of instances for searx."""
|
||||
|
||||
msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'
|
||||
'(Due to extensive quality checks, this may take a bit.)')
|
||||
plausible: List[str] = []
|
||||
|
||||
# Get, parse, and quality check all instances
|
||||
async with self.request.get('https://searx.space/data/instances.json') as r:
|
||||
# Parsing
|
||||
searx_json = await r.json()
|
||||
instances = searx_json['instances']
|
||||
|
||||
# Quality Check
|
||||
for i in instances:
|
||||
content = instances.get(i)
|
||||
is_good: bool = await self._instance_check(i, content)
|
||||
if is_good:
|
||||
plausible.append(i)
|
||||
|
||||
# Save new list
|
||||
self.instances = plausible
|
||||
with open('searxes.txt', 'w') as f:
|
||||
f.write('\n'.join(plausible))
|
||||
|
||||
await msg.edit(content='Instances refreshed!')
|
||||
|
||||
@commands.Cog.listener()
|
||||
async def on_command_error(self, ctx, error):
|
||||
"""Listener makes no command fallback to searching."""
|
||||
|
||||
if isinstance(error, commands.CommandNotFound) or \
|
||||
isinstance(error, commands.CheckFailure):
|
||||
fallback = (commands.CommandNotFound, commands.CheckFailure)
|
||||
|
||||
# Handling
|
||||
async with ctx.typing():
|
||||
# Prepares term
|
||||
term = ctx.message.content.replace(ctx.prefix, '', 1)
|
||||
term = term.lstrip(' ')
|
||||
|
||||
# Does search
|
||||
await self._basic_search(ctx, term)
|
||||
if isinstance(error, fallback):
|
||||
try:
|
||||
await self._basic_search(
|
||||
ctx, ctx.message.content[len(ctx.prefix):]
|
||||
)
|
||||
except SearchExceptions.SafesearchFail:
|
||||
await ctx.send(
|
||||
"**Sorry!** That query included language "
|
||||
"we cannot accept in a non-NSFW channel. "
|
||||
"Please try again in an NSFW channel."
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
def setup(bot):
|
||||
|
||||
bot.add_cog(Search(bot))
|
||||
|
|
|
@ -19,6 +19,7 @@ class Logging():
|
|||
self.request = bot.request
|
||||
self.online = bot.online
|
||||
self.maintenance = bot.maintenance
|
||||
self.debug_toggle = bot.debug_toggle
|
||||
|
||||
# Sets info hook first
|
||||
self.info_hook = self.online.get_webhook(
|
||||
|
@ -26,7 +27,6 @@ class Logging():
|
|||
if bot.config['HOOKS']['INFO_HOOK'] \
|
||||
else None
|
||||
|
||||
|
||||
# Sets other hooks or defaults them
|
||||
if self.info_hook:
|
||||
self.warn_hook = self.online.get_webhook(
|
||||
|
@ -51,7 +51,7 @@ class Logging():
|
|||
|
||||
# Prerequisites
|
||||
formatted_tb = traceback.format_tb(error.__traceback__)
|
||||
formatted_tb = ''.join(formatted_tb)
|
||||
tb_str = ''.join(formatted_tb)
|
||||
original_exc = traceback.format_exception(
|
||||
type(error), error, error.__traceback__)
|
||||
|
||||
|
@ -77,7 +77,7 @@ class Logging():
|
|||
trace_content = (
|
||||
"```py\n\nTraceback (most recent call last):"
|
||||
"\n{}{}: {}```").format(
|
||||
formatted_tb,
|
||||
tb_str,
|
||||
type(error).__name__,
|
||||
error)
|
||||
|
||||
|
@ -120,7 +120,9 @@ class Logging():
|
|||
if self.warn_hook:
|
||||
return await self.warn_hook.send(
|
||||
content=content,
|
||||
username=f"{self.bot.user.name} - {name if name else 'unknown'}",
|
||||
username=(
|
||||
f"{self.bot.user.name} - {name if name else 'unknown'}"
|
||||
),
|
||||
avatar_url=str(self.bot.user.avatar_url),
|
||||
embed=embed
|
||||
)
|
||||
|
@ -144,7 +146,9 @@ class Logging():
|
|||
)
|
||||
await self.error_hook.send(
|
||||
content=fallback,
|
||||
username=f"{self.bot.user.name} - {name if name else 'unknown'}",
|
||||
username=(
|
||||
f"{self.bot.user.name} - {name if name else 'unknown'}"
|
||||
),
|
||||
avatar_url=str(self.bot.user.avatar_url),
|
||||
embed=error_embed
|
||||
)
|
||||
|
@ -168,10 +172,12 @@ class Logging():
|
|||
name: Optional[str] = None):
|
||||
"""Logs warnings and sends them to the appropriate places."""
|
||||
|
||||
if self.debug_hook and self.maintenance:
|
||||
if self.debug_hook and (self.maintenance or self.debug_toggle):
|
||||
return await self.debug_hook.send(
|
||||
content=content,
|
||||
username=f"{self.bot.user.name} - {name if name else 'unknown'}",
|
||||
content=f"```{content}```",
|
||||
username=(
|
||||
f"{self.bot.user.name} - {name if name else 'unknown'}"
|
||||
),
|
||||
avatar_url=str(self.bot.user.avatar_url),
|
||||
embed=embed
|
||||
)
|
||||
|
|
21
main.py
21
main.py
|
@ -9,14 +9,13 @@
|
|||
|
||||
import discord
|
||||
from discord.ext import commands
|
||||
import traceback
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import rethinkdb
|
||||
from typing import List, Optional
|
||||
from typing import List
|
||||
from extensions.models import SearchExceptions
|
||||
|
||||
|
||||
class Bot(commands.Bot):
|
||||
|
@ -30,6 +29,7 @@ class Bot(commands.Bot):
|
|||
|
||||
# Setup
|
||||
self.extensions_list: List[str] = []
|
||||
self.debug_toggle = False
|
||||
|
||||
with open('config.json') as f:
|
||||
self.config = json.load(f)
|
||||
|
@ -249,13 +249,26 @@ async def on_command_error(ctx, error):
|
|||
"""Handles all errors stemming from ext.commands."""
|
||||
|
||||
# Lets other cogs handle CommandNotFound.
|
||||
# Change this if you want command not found handling
|
||||
# Change this if you want command not found handling.
|
||||
if (
|
||||
isinstance(error, commands.CommandNotFound)
|
||||
or isinstance(error, commands.CheckFailure)
|
||||
):
|
||||
return
|
||||
|
||||
# Custom message for if an argument is missing.
|
||||
elif isinstance(error, commands.MissingRequiredArgument):
|
||||
await ctx.send(
|
||||
f"**Missing Argument!** A `{error.param.name}` is needed."
|
||||
)
|
||||
|
||||
elif isinstance(error, SearchExceptions.SafesearchFail):
|
||||
await ctx.send(
|
||||
"**Sorry!** That query included language "
|
||||
"we cannot accept in a non-NSFW channel. "
|
||||
"Please try again in an NSFW channel."
|
||||
)
|
||||
|
||||
# Provides a very pretty embed if something's actually a dev's fault.
|
||||
elif isinstance(error, commands.CommandInvokeError):
|
||||
|
||||
|
|
211
old_search.py
Normal file
211
old_search.py
Normal file
|
@ -0,0 +1,211 @@
|
|||
# This is the old search logic for reference purposes
|
||||
|
||||
async def _old_search_logic(self, query: str, is_nsfw: bool = False,
|
||||
category: str = None) -> str:
|
||||
"""Provides search logic for all search commands."""
|
||||
|
||||
# NSFW Filtering
|
||||
# WARNING - This list includes slurs.
|
||||
nono_words = [
|
||||
'tranny', 'faggot', 'fag',
|
||||
'porn', 'cock', 'dick',
|
||||
'titty', 'boob', 'penis',
|
||||
'slut', 'cum', 'jizz',
|
||||
'semen', 'cooch', 'coochie',
|
||||
'pussy', 'penis', 'fetish',
|
||||
'bdsm', 'sexy', 'xxx',
|
||||
'orgasm', 'masturbation',
|
||||
'erotic', 'creampie',
|
||||
'fap', 'nude', 'orgasm',
|
||||
'squirting', 'yiff',
|
||||
'e621'
|
||||
]
|
||||
nono_sites = [
|
||||
'xvideos', 'pornhub',
|
||||
'xhamster', 'xnxx',
|
||||
'youporn', 'xxx',
|
||||
'freexcafe', 'sex.com',
|
||||
'e621', 'nhentai'
|
||||
]
|
||||
|
||||
if not is_nsfw:
|
||||
for i in nono_words:
|
||||
if i in query.replace(" ", ""):
|
||||
return (
|
||||
"**Sorry!** That query included language "
|
||||
"we cannot accept in a non-NSFW channel. "
|
||||
"Please try again in an NSFW channel."
|
||||
)
|
||||
|
||||
# Choose an instance
|
||||
if self.instances == []:
|
||||
with open('searxes.txt') as f:
|
||||
self.instances = f.read().split('\n')
|
||||
instance = random.sample(self.instances, k=1)[0]
|
||||
|
||||
# Error Template
|
||||
error_msg = (
|
||||
"**An error occured!**\n\n"
|
||||
f"There was a problem with `{instance}`. Please try again later.\n"
|
||||
f"_If problems with this instance persist, "
|
||||
f"contact`{self.bot.appinfo.owner}` to have it removed._"
|
||||
)
|
||||
|
||||
# Create the URL to make an API call to
|
||||
call = f'{instance}search?q={query}&format=json&language=en-US'
|
||||
|
||||
# If a type is provided, add that type to the call URL
|
||||
if category:
|
||||
call += f'&categories={category}'
|
||||
|
||||
if is_nsfw:
|
||||
call += '&safesearch=0'
|
||||
else:
|
||||
call += '&safesearch=1'
|
||||
|
||||
# Figure out engines for different categories to get decent results.
|
||||
if category == 'videos':
|
||||
call += '&engines=bing+videos,google+videos'
|
||||
# Make said API call
|
||||
try:
|
||||
async with self.request.get(call) as resp:
|
||||
response = await resp.json()
|
||||
except aiohttp.ClientError:
|
||||
return error_msg
|
||||
|
||||
# Split our response data up for parsing
|
||||
# infoboxes = response['infoboxes']
|
||||
results = response['results']
|
||||
|
||||
# Create message with results
|
||||
try:
|
||||
# Handle tiny result count
|
||||
if len(results) > 5:
|
||||
amt = 5
|
||||
else:
|
||||
amt = len(results)
|
||||
|
||||
# Remove no-no sites
|
||||
if not is_nsfw:
|
||||
for r in results[0:7]:
|
||||
for n in nono_sites:
|
||||
if n in r['url']:
|
||||
results.remove(r)
|
||||
|
||||
# Escape stuff
|
||||
query = discord.utils.escape_mentions(query)
|
||||
query = discord.utils.escape_markdown(query)
|
||||
|
||||
# Header
|
||||
msg = f"Showing **{amt}** results for `{query}`. \n\n"
|
||||
# Expanded Result
|
||||
msg += (
|
||||
f"**{results[0]['title']}** <{results[0]['url']}>\n"
|
||||
f"{results[0]['content']}\n\n")
|
||||
# Other Results
|
||||
msg += "\n".join(
|
||||
[f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
|
||||
# Instance Info
|
||||
msg += f"\n\n_Results retrieved from instance `{instance}`._"
|
||||
|
||||
return msg
|
||||
|
||||
# Reached if error with returned results
|
||||
except (KeyError, IndexError) as e:
|
||||
# Logging
|
||||
await self.warn(
|
||||
f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
|
||||
"Consider removing it or looking into it.",
|
||||
name="Failed Instance"
|
||||
)
|
||||
|
||||
self.instances.remove(instance) # Weed the instance out
|
||||
# Recurse until good response
|
||||
return await self._old_search_logic(query, is_nsfw)
|
||||
|
||||
async def _instance_check(self, instance: str, content: dict) -> bool:
|
||||
"""Checks the quality of an instance."""
|
||||
|
||||
# Makes sure proper values exist
|
||||
if 'error' in content:
|
||||
return False
|
||||
if not ('engines' in content and 'initial' in content['timing']):
|
||||
return False
|
||||
if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):
|
||||
return False
|
||||
|
||||
# Makes sure google is enabled
|
||||
if not content['engines']['google']['enabled']:
|
||||
return False
|
||||
|
||||
# Makes sure is not Tor
|
||||
if content['network_type'] != 'normal':
|
||||
return False
|
||||
|
||||
# Only picks instances that are fast enough
|
||||
timing = int(content['timing']['initial'])
|
||||
if timing > 0.20:
|
||||
return False
|
||||
|
||||
# Check for Google captcha
|
||||
test_search = f'{instance}/search?q=test&format=json&lang=en-US'
|
||||
try:
|
||||
async with self.request.get(test_search) as resp:
|
||||
response = await resp.json()
|
||||
response['results'][0]['content']
|
||||
except (aiohttp.ClientError, KeyError, IndexError):
|
||||
return False
|
||||
|
||||
# Reached if passes all checks
|
||||
return True
|
||||
|
||||
@commands.command()
|
||||
@commands.is_owner()
|
||||
async def rejson(self, ctx):
|
||||
"""Refreshes the list of instances for searx."""
|
||||
|
||||
msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'
|
||||
'(Due to extensive quality checks, this may take a bit.)')
|
||||
plausible: List[str] = []
|
||||
|
||||
# Get, parse, and quality check all instances
|
||||
async with self.request.get('https://searx.space/data/instances.json') as r:
|
||||
# Parsing
|
||||
searx_json = await r.json()
|
||||
instances = searx_json['instances']
|
||||
|
||||
# Quality Check
|
||||
for i in instances:
|
||||
content = instances.get(i)
|
||||
is_good: bool = await self._instance_check(i, content)
|
||||
if is_good:
|
||||
plausible.append(i)
|
||||
|
||||
# Save new list
|
||||
self.instances = plausible
|
||||
with open('searxes.txt', 'w') as f:
|
||||
f.write('\n'.join(plausible))
|
||||
|
||||
await msg.edit(content='Instances refreshed!')
|
||||
|
||||
async def _old_basic_search(self, ctx, query: str,
|
||||
category: str = None):
|
||||
"""Base search message generation."""
|
||||
|
||||
async with ctx.typing():
|
||||
is_nsfw = (
|
||||
ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
|
||||
else False
|
||||
)
|
||||
|
||||
msg = await self._old_search_logic(query, is_nsfw, category)
|
||||
await ctx.send(msg)
|
||||
|
||||
await self.info(
|
||||
content=(
|
||||
f"**{ctx.author}** searched for `{query}` "
|
||||
f"in \"{ctx.guild}\" and got this:"
|
||||
f"\n\n{msg}"
|
||||
),
|
||||
name="Search Results"
|
||||
)
|
Loading…
Reference in a new issue