searchbot-discord/extensions/search.py

253 lines
8.1 KiB
Python
Raw Normal View History

2020-02-22 21:42:46 +00:00
# -*- coding: utf-8 -*-
# Search Functionality
2020-03-28 18:29:18 +00:00
# Provides search results from Qwant
2020-02-22 21:42:46 +00:00
'''Search Cog'''
2020-04-06 16:26:15 +00:00
from typing import List
2020-02-22 21:42:46 +00:00
import discord
from discord.ext import commands
2020-03-24 23:09:01 +00:00
import html2text
2020-03-25 02:45:43 +00:00
import re
2020-04-06 16:26:15 +00:00
from urllib.parse import quote_plus
from extensions.models import SearchExceptions
2020-02-22 21:42:46 +00:00
class Search(commands.Cog, name="Basic"):
"""Searches the web for a variety of different resources."""
def __init__(self, bot):
2020-03-02 18:11:15 +00:00
# Main Stuff
2020-02-22 21:42:46 +00:00
self.bot = bot
2020-03-03 04:08:20 +00:00
self.info = bot.logging.info
self.warn = bot.logging.warn
2020-03-24 23:09:01 +00:00
self.debug = bot.logging.debug
self.request = bot.request
2020-03-02 18:11:15 +00:00
self.emoji = "\U0001F50D"
2020-03-24 23:09:01 +00:00
self.scrape_token = bot.config['SCRAPESTACK']
2020-02-22 21:42:46 +00:00
2020-03-24 23:09:01 +00:00
# Markdown converter
self.tomd = html2text.HTML2Text()
self.tomd.ignore_links = True
self.tomd.ignore_images = True
self.tomd.ignore_tables = True
self.tomd.ignore_emphasis = True
self.tomd.body_width = 0
2020-03-19 18:16:22 +00:00
2020-03-23 23:26:00 +00:00
async def _search_logic(self, query: str, is_nsfw: bool = False,
2020-03-24 23:09:01 +00:00
category: str = 'web', count: int = 5) -> list:
"""Uses scrapestack and the Qwant API to find search results."""
# Typing
base: str
safesearch: str
2020-02-22 21:42:46 +00:00
# NSFW Filtering
2020-03-24 23:09:01 +00:00
# WARNING This list includes slurs.
2020-02-27 19:09:44 +00:00
nono_words = [
2020-03-24 23:09:01 +00:00
'tranny', 'faggot', 'fag', 'porn', 'cock', 'dick',
'titty', ' tit ', 'boob', 'penis', 'slut', ' cum ', 'jizz',
'semen', 'cooch', 'coochie', 'pussy', 'penis', 'fetish',
'bdsm', 'sexy', 'xxx', 'orgasm', 'masturbat',
'erotic', 'creampie', 'fap', 'nude', 'orgasm',
'squirting', 'yiff', 'e621', ' sex', 'ejaculat',
'cunt', 'vagina', 'coom', 'troon', 'hentai', 'yaoi',
'bukkake', 'bara', 'shota', 'loli', 'fetish', 'spunk',
'pron', 'p0rn', 'pr0n', 'gloryhole', 'felch', 'skullfuck',
'scat', 'pissplay', 'piss play', 'underage', 'bbw',
'fisting', 'queef', "rimming", 'rimjob', 'bdsm',
'cbt', 'blumpkin', 'boner', 'prostitut', 'butt plug',
'transvestite', 'femboy', 'castrat', 'philia', 'edging',
'edgeplay', 'enema', 'facial', 'fellat', 'femdom', 'footjob',
'blowjob', 'titjob', 'handjob', 'frot', 'gang bang', 'gangbang',
'glory hole', 'hermap', 'jerk off', 'jerking off', 'jack off',
'jacking off', 'kink', 'wet dream', 'anal', 'pegging', 'precum',
'pre-cum', 'pre cum', 'priap', 'scrotum', 'shemale', 'smegma',
'smut', 'softcore', 'transsexual', 'voyeur', 'viagra', 'wank',
'whore'
2020-02-27 19:09:44 +00:00
]
2020-03-24 23:09:01 +00:00
if any(n in query for n in nono_words):
raise SearchExceptions.SafesearchFail('Query had NSFW.')
base = "https://api.qwant.com/api"
# Safesearch
if is_nsfw:
safesearch = "0"
else:
safesearch = "2"
# Search URL Building
# api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...
search_url = (
f"{base}/search/{category}"
f"?count={count}"
f"&q={query}"
f"&safesearch={safesearch}"
"&t=web"
"&locale=en_US"
"&uiv=4"
)
2020-03-25 02:57:54 +00:00
# Scrape or not
if self.scrape_token != '':
search_url = (
"http://api.scrapestack.com/scrape"
f"?access_key={self.scrape_token}"
f"&url={quote_plus(search_url)}"
)
2020-03-24 23:09:01 +00:00
await self.debug(search_url, name="_search_logic")
2020-02-22 21:42:46 +00:00
2020-03-24 23:09:01 +00:00
# Searching
headers = {
'User-Agent': (
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'
' Gecko/20100101 Firefox/74.0'
)
}
async with self.request.get(search_url, headers=headers) as resp:
to_parse = await resp.json()
2020-03-25 02:45:43 +00:00
print(to_parse)
2020-02-22 21:42:46 +00:00
2020-03-24 23:09:01 +00:00
# Sends results
return to_parse['data']['result']['items']
2020-02-27 19:09:44 +00:00
2020-03-24 23:09:01 +00:00
async def _basic_search(self, ctx, query: str, category: str = 'web'):
"""Basic search formatting."""
2020-02-22 21:42:46 +00:00
2020-03-24 23:09:01 +00:00
# NOTE Customizable count not yet implemented.
count: int = 5
2020-02-22 21:42:46 +00:00
2020-03-24 23:09:01 +00:00
# Safesearch variable
is_nsfw = (
ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
else False
)
# Handling
2020-02-22 21:42:46 +00:00
async with ctx.typing():
2020-03-24 23:09:01 +00:00
# Searches
results = await self._search_logic(query, is_nsfw, category)
count = len(results)
# Escapes all nasties for displaying
query_display = discord.utils.escape_mentions(query)
query_display = discord.utils.escape_markdown(query_display)
# Return if no results
try:
results[0]
except IndexError:
return await ctx.send(
f"No results found for `{query_display}`."
)
# Gets the first entry's stuff
first_title = self.tomd.handle(results[0]['title']).rstrip('\n')
first_url = results[0]['url']
first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')
# Builds the substring for each of the other results.
other_results: List[str] = []
for r in results[1:count]:
title = self.tomd.handle(r['title']).rstrip('\n')
2020-03-24 23:25:30 +00:00
url = r['url']
2020-03-25 02:45:43 +00:00
other_results.append(f"**{title}** {url}")
2020-03-24 23:09:01 +00:00
other_msg: str = "\n".join(other_results)
# Builds message
msg = (
f"Showing **{count}** results for `{query_display}`.\n\n"
2020-03-25 02:45:43 +00:00
f"**{first_title}** {first_url}\n{first_desc}\n\n"
2020-03-24 23:09:01 +00:00
f"{other_msg}\n\n_Powered by Qwant._"
)
2020-03-25 02:45:43 +00:00
print(msg)
msg = re.sub(
r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
r'<\1>',
msg
)
2020-03-24 23:09:01 +00:00
# Sends message
2020-03-24 23:34:45 +00:00
await self.info(
2020-03-24 23:35:25 +00:00
f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
2020-03-24 23:36:10 +00:00
name="New Search"
2020-03-24 23:34:45 +00:00
)
await ctx.send(msg)
@commands.command()
async def search(self, ctx, *, query: str):
"""Search online for general results."""
await self._basic_search(ctx, query)
2020-02-22 21:42:46 +00:00
# @commands.command(aliases=['video'])
# async def videos(self, ctx, *, query: str):
# """Search online for videos."""
# await self._basic_search(ctx, query, 'videos')
# @commands.command()
# async def music(self, ctx, *, query: str):
# """Search online for music."""
# await self._basic_search(ctx, query, 'music')
# @commands.command(aliases=['file'])
# async def files(self, ctx, *, query: str):
# """Search online for files."""
# await self._basic_search(ctx, query, 'files')
# @commands.command(aliases=['image'])
# async def images(self, ctx, *, query: str):
# """Search online for images."""
# await self._basic_search(ctx, query, 'images')
# @commands.command()
# async def it(self, ctx, *, query: str):
# """Search online for IT-related information."""
# await self._basic_search(ctx, query, 'it')
# @commands.command(aliases=['map'])
# async def maps(self, ctx, *, query: str):
# """Search online for map information."""
# await self._basic_search(ctx, query, 'maps')
2020-03-21 16:37:15 +00:00
2020-02-22 21:42:46 +00:00
@commands.Cog.listener()
async def on_command_error(self, ctx, error):
"""Listener makes no command fallback to searching."""
2020-03-24 23:09:01 +00:00
fallback = (commands.CommandNotFound, commands.CheckFailure)
2020-03-23 23:26:00 +00:00
2020-03-24 23:09:01 +00:00
if isinstance(error, fallback):
try:
await self._basic_search(
ctx, ctx.message.content[len(ctx.prefix):]
)
except SearchExceptions.SafesearchFail:
await ctx.send(
"**Sorry!** That query included language "
"we cannot accept in a non-NSFW channel. "
"Please try again in an NSFW channel."
)
2020-02-22 21:42:46 +00:00
2020-02-28 16:02:24 +00:00
2020-02-22 21:42:46 +00:00
def setup(bot):
2020-03-24 23:09:01 +00:00
bot.add_cog(Search(bot))