2020-02-22 21:42:46 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# Search Functionality
|
2020-03-28 18:29:18 +00:00
|
|
|
# Provides search results from Qwant
|
2020-02-22 21:42:46 +00:00
|
|
|
|
|
|
|
'''Search Cog'''
|
|
|
|
|
2020-04-06 16:26:15 +00:00
|
|
|
from typing import List
|
|
|
|
|
2020-02-22 21:42:46 +00:00
|
|
|
import discord
|
|
|
|
from discord.ext import commands
|
2020-03-24 23:09:01 +00:00
|
|
|
import html2text
|
2020-03-25 02:45:43 +00:00
|
|
|
import re
|
2020-04-06 16:26:15 +00:00
|
|
|
from urllib.parse import quote_plus
|
|
|
|
|
2020-04-09 02:05:37 +00:00
|
|
|
from extensions.models import searchexceptions
|
2020-04-08 22:23:14 +00:00
|
|
|
from extensions.models.regex import nono_re
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-02-29 04:14:34 +00:00
|
|
|
|
2020-03-24 00:22:37 +00:00
|
|
|
class Search(commands.Cog, name="Basic"):
|
2020-03-02 06:12:16 +00:00
|
|
|
"""Searches the web for a variety of different resources."""
|
|
|
|
|
2020-02-29 04:14:34 +00:00
|
|
|
def __init__(self, bot):
|
2020-03-02 18:11:15 +00:00
|
|
|
|
|
|
|
# Main Stuff
|
2020-02-22 21:42:46 +00:00
|
|
|
self.bot = bot
|
2020-03-03 04:08:20 +00:00
|
|
|
self.info = bot.logging.info
|
|
|
|
self.warn = bot.logging.warn
|
2020-03-24 23:09:01 +00:00
|
|
|
self.debug = bot.logging.debug
|
2020-02-23 22:47:51 +00:00
|
|
|
self.request = bot.request
|
2020-03-02 18:11:15 +00:00
|
|
|
self.emoji = "\U0001F50D"
|
2020-03-24 23:09:01 +00:00
|
|
|
self.scrape_token = bot.config['SCRAPESTACK']
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
# Markdown converter
|
|
|
|
self.tomd = html2text.HTML2Text()
|
|
|
|
self.tomd.ignore_links = True
|
|
|
|
self.tomd.ignore_images = True
|
|
|
|
self.tomd.ignore_tables = True
|
|
|
|
self.tomd.ignore_emphasis = True
|
|
|
|
self.tomd.body_width = 0
|
2020-03-19 18:16:22 +00:00
|
|
|
|
2020-03-23 23:26:00 +00:00
|
|
|
async def _search_logic(self, query: str, is_nsfw: bool = False,
|
2020-03-24 23:09:01 +00:00
|
|
|
category: str = 'web', count: int = 5) -> list:
|
|
|
|
"""Uses scrapestack and the Qwant API to find search results."""
|
|
|
|
|
|
|
|
# Typing
|
|
|
|
base: str
|
|
|
|
safesearch: str
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-02 02:50:49 +00:00
|
|
|
# NSFW Filtering
|
2020-04-09 00:18:03 +00:00
|
|
|
if nono_re.match(query) and not is_nsfw:
|
2020-04-09 02:05:37 +00:00
|
|
|
raise searchexceptions.SafesearchFail('Query had NSFW.')
|
2020-03-24 23:09:01 +00:00
|
|
|
|
|
|
|
base = "https://api.qwant.com/api"
|
|
|
|
|
|
|
|
# Safesearch
|
|
|
|
if is_nsfw:
|
|
|
|
safesearch = "0"
|
|
|
|
else:
|
|
|
|
safesearch = "2"
|
|
|
|
|
|
|
|
# Search URL Building
|
|
|
|
# api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...
|
2020-11-10 23:39:15 +00:00
|
|
|
# XXX Find out why quote_plus wasn't working
|
|
|
|
query_with_plus = query.replace(' ', '+')
|
2020-03-24 23:09:01 +00:00
|
|
|
search_url = (
|
|
|
|
f"{base}/search/{category}"
|
|
|
|
f"?count={count}"
|
2020-11-10 23:39:15 +00:00
|
|
|
f"&q={query_with_plus}"
|
2020-03-24 23:09:01 +00:00
|
|
|
f"&safesearch={safesearch}"
|
|
|
|
"&t=web"
|
|
|
|
"&locale=en_US"
|
|
|
|
"&uiv=4"
|
2020-03-02 02:50:49 +00:00
|
|
|
)
|
2020-03-25 02:57:54 +00:00
|
|
|
|
|
|
|
# Scrape or not
|
|
|
|
if self.scrape_token != '':
|
|
|
|
search_url = (
|
|
|
|
"http://api.scrapestack.com/scrape"
|
|
|
|
f"?access_key={self.scrape_token}"
|
|
|
|
f"&url={quote_plus(search_url)}"
|
|
|
|
)
|
|
|
|
|
2020-04-07 23:56:01 +00:00
|
|
|
self.debug(search_url, name="_search_logic")
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
# Searching
|
|
|
|
headers = {
|
|
|
|
'User-Agent': (
|
|
|
|
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'
|
|
|
|
' Gecko/20100101 Firefox/74.0'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
async with self.request.get(search_url, headers=headers) as resp:
|
|
|
|
to_parse = await resp.json()
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
# Sends results
|
|
|
|
return to_parse['data']['result']['items']
|
2020-02-27 19:09:44 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
async def _basic_search(self, ctx, query: str, category: str = 'web'):
|
|
|
|
"""Basic search formatting."""
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
# NOTE Customizable count not yet implemented.
|
|
|
|
count: int = 5
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
# Safesearch variable
|
|
|
|
is_nsfw = (
|
|
|
|
ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
|
|
|
|
else False
|
|
|
|
)
|
|
|
|
|
|
|
|
# Handling
|
2020-02-22 21:42:46 +00:00
|
|
|
async with ctx.typing():
|
2020-03-24 23:09:01 +00:00
|
|
|
|
|
|
|
# Searches
|
|
|
|
results = await self._search_logic(query, is_nsfw, category)
|
|
|
|
count = len(results)
|
|
|
|
|
|
|
|
# Escapes all nasties for displaying
|
|
|
|
query_display = discord.utils.escape_mentions(query)
|
|
|
|
query_display = discord.utils.escape_markdown(query_display)
|
|
|
|
|
|
|
|
# Return if no results
|
|
|
|
try:
|
|
|
|
results[0]
|
|
|
|
except IndexError:
|
|
|
|
return await ctx.send(
|
|
|
|
f"No results found for `{query_display}`."
|
|
|
|
)
|
|
|
|
|
|
|
|
# Gets the first entry's stuff
|
|
|
|
first_title = self.tomd.handle(results[0]['title']).rstrip('\n')
|
|
|
|
first_url = results[0]['url']
|
|
|
|
first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')
|
|
|
|
|
|
|
|
# Builds the substring for each of the other results.
|
|
|
|
other_results: List[str] = []
|
|
|
|
for r in results[1:count]:
|
|
|
|
title = self.tomd.handle(r['title']).rstrip('\n')
|
2020-03-24 23:25:30 +00:00
|
|
|
url = r['url']
|
2020-03-25 02:45:43 +00:00
|
|
|
other_results.append(f"**{title}** {url}")
|
2020-03-24 23:09:01 +00:00
|
|
|
other_msg: str = "\n".join(other_results)
|
|
|
|
|
|
|
|
# Builds message
|
|
|
|
msg = (
|
|
|
|
f"Showing **{count}** results for `{query_display}`.\n\n"
|
2020-03-25 02:45:43 +00:00
|
|
|
f"**{first_title}** {first_url}\n{first_desc}\n\n"
|
2020-03-24 23:09:01 +00:00
|
|
|
f"{other_msg}\n\n_Powered by Qwant._"
|
2020-03-24 00:22:37 +00:00
|
|
|
)
|
|
|
|
|
2020-03-25 02:45:43 +00:00
|
|
|
msg = re.sub(
|
|
|
|
r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
|
|
|
|
r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
|
|
|
|
r'<\1>',
|
|
|
|
msg
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
# Sends message
|
2020-04-07 23:56:01 +00:00
|
|
|
self.info(
|
2020-03-24 23:35:25 +00:00
|
|
|
f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
|
2020-03-24 23:36:10 +00:00
|
|
|
name="New Search"
|
2020-03-24 23:34:45 +00:00
|
|
|
)
|
2020-03-24 00:22:37 +00:00
|
|
|
await ctx.send(msg)
|
|
|
|
|
|
|
|
@commands.command()
|
|
|
|
async def search(self, ctx, *, query: str):
|
|
|
|
"""Search online for general results."""
|
|
|
|
|
|
|
|
await self._basic_search(ctx, query)
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# @commands.command(aliases=['video'])
|
|
|
|
# async def videos(self, ctx, *, query: str):
|
|
|
|
# """Search online for videos."""
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# await self._basic_search(ctx, query, 'videos')
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# @commands.command()
|
|
|
|
# async def music(self, ctx, *, query: str):
|
|
|
|
# """Search online for music."""
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# await self._basic_search(ctx, query, 'music')
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# @commands.command(aliases=['file'])
|
|
|
|
# async def files(self, ctx, *, query: str):
|
|
|
|
# """Search online for files."""
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# await self._basic_search(ctx, query, 'files')
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# @commands.command(aliases=['image'])
|
|
|
|
# async def images(self, ctx, *, query: str):
|
|
|
|
# """Search online for images."""
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# await self._basic_search(ctx, query, 'images')
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# @commands.command()
|
|
|
|
# async def it(self, ctx, *, query: str):
|
|
|
|
# """Search online for IT-related information."""
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# await self._basic_search(ctx, query, 'it')
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# @commands.command(aliases=['map'])
|
|
|
|
# async def maps(self, ctx, *, query: str):
|
|
|
|
# """Search online for map information."""
|
2020-03-02 02:50:49 +00:00
|
|
|
|
2020-03-25 04:52:38 +00:00
|
|
|
# await self._basic_search(ctx, query, 'maps')
|
2020-03-21 16:37:15 +00:00
|
|
|
|
2020-02-22 21:42:46 +00:00
|
|
|
@commands.Cog.listener()
|
|
|
|
async def on_command_error(self, ctx, error):
|
|
|
|
"""Listener makes no command fallback to searching."""
|
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
fallback = (commands.CommandNotFound, commands.CheckFailure)
|
2020-03-23 23:26:00 +00:00
|
|
|
|
2020-03-24 23:09:01 +00:00
|
|
|
if isinstance(error, fallback):
|
|
|
|
try:
|
|
|
|
await self._basic_search(
|
|
|
|
ctx, ctx.message.content[len(ctx.prefix):]
|
|
|
|
)
|
2020-04-09 02:05:37 +00:00
|
|
|
except searchexceptions.SafesearchFail:
|
2020-03-24 23:09:01 +00:00
|
|
|
await ctx.send(
|
|
|
|
"**Sorry!** That query included language "
|
|
|
|
"we cannot accept in a non-NSFW channel. "
|
|
|
|
"Please try again in an NSFW channel."
|
|
|
|
)
|
2020-02-22 21:42:46 +00:00
|
|
|
|
2020-02-28 16:02:24 +00:00
|
|
|
|
2020-02-22 21:42:46 +00:00
|
|
|
def setup(bot):
|
2020-03-24 23:09:01 +00:00
|
|
|
|
2020-02-29 04:14:34 +00:00
|
|
|
bot.add_cog(Search(bot))
|