searchbot-discord/extensions/search.py

# -*- coding: utf-8 -*-

# Search Functionality
# Provides search results from Qwant

'''Search Cog'''

from typing import List

import discord
from discord.ext import commands
import html2text
import re
from urllib.parse import quote_plus

from extensions.models import searchexceptions
from extensions.models.regex import nono_re


class Search(commands.Cog, name="Basic"):
    """Searches the web for a variety of different resources."""

    def __init__(self, bot):

        # Main Stuff
        self.bot = bot
        self.info = bot.logging.info
        self.warn = bot.logging.warn
        self.debug = bot.logging.debug
        self.request = bot.request
        self.emoji = "\U0001F50D"
        self.scrape_token = bot.config['SCRAPESTACK']

        # Markdown converter
        self.tomd = html2text.HTML2Text()
        self.tomd.ignore_links = True
        self.tomd.ignore_images = True
        self.tomd.ignore_tables = True
        self.tomd.ignore_emphasis = True
        self.tomd.body_width = 0

    async def _search_logic(self, query: str, is_nsfw: bool = False,
                            category: str = 'web', count: int = 5) -> list:
        """Uses scrapestack and the Qwant API to find search results."""

        # Typing
        base: str
        safesearch: str

        # NSFW Filtering
        if nono_re.match(query) and not is_nsfw:
            raise searchexceptions.SafesearchFail('Query had NSFW.')

        base = "https://api.qwant.com/api"

        # Safesearch
        if is_nsfw:
            safesearch = "0"
        else:
            safesearch = "2"

        # Search URL Building
        # api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...
        # XXX Find out why quote_plus wasn't working
        query_with_plus = query.replace(' ', '+')
        search_url = (
            f"{base}/search/{category}"
            f"?count={count}"
            f"&q={query_with_plus}"
            f"&safesearch={safesearch}"
            "&t=web"
            "&locale=en_US"
            "&uiv=4"
        )

        # Scrape or not
        if self.scrape_token != '':
            search_url = (
                "http://api.scrapestack.com/scrape"
                f"?access_key={self.scrape_token}"
                f"&url={quote_plus(search_url)}"
            )

        self.debug(search_url, name="_search_logic")

        # Searching
        headers = {
            'User-Agent': (
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'
                ' Gecko/20100101 Firefox/74.0'
            )
        }
        async with self.request.get(search_url, headers=headers) as resp:
            to_parse = await resp.json()

            # Sends results
            return to_parse['data']['result']['items']

    async def _basic_search(self, ctx, query: str, category: str = 'web'):
        """Basic search formatting."""

        # NOTE Customizable count not yet implemented.
        count: int = 5

        # Safesearch variable
        is_nsfw = (
            ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
            else False
        )

        # Handling
        async with ctx.typing():

            # Searches
            results = await self._search_logic(query, is_nsfw, category)
            count = len(results)

            # Escapes all nasties for displaying
            query_display = discord.utils.escape_mentions(query)
            query_display = discord.utils.escape_markdown(query_display)

            # Return if no results
            try:
                results[0]
            except IndexError:
                return await ctx.send(
                    f"No results found for `{query_display}`."
                )

            # Gets the first entry's stuff
            first_title = self.tomd.handle(results[0]['title']).rstrip('\n')
            first_url = results[0]['url']
            first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')

            # Builds the substring for each of the other results.
            other_results: List[str] = []
            for r in results[1:count]:
                title = self.tomd.handle(r['title']).rstrip('\n')
                url = r['url']
                other_results.append(f"**{title}** {url}")
            other_msg: str = "\n".join(other_results)

            # Builds message
            msg = (
                f"Showing **{count}** results for `{query_display}`.\n\n"
                f"**{first_title}** {first_url}\n{first_desc}\n\n"
                f"{other_msg}\n\n_Powered by Qwant._"
            )

            msg = re.sub(
                r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
                r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
                r'<\1>',
                msg
            )


            # Sends message
            self.info(
                f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
                name="New Search"
            )
            await ctx.send(msg)

    @commands.command()
    async def search(self, ctx, *, query: str):
        """Search online for general results."""

        await self._basic_search(ctx, query)

    # @commands.command(aliases=['video'])
    # async def videos(self, ctx, *, query: str):
    #     """Search online for videos."""

    #     await self._basic_search(ctx, query, 'videos')

    # @commands.command()
    # async def music(self, ctx, *, query: str):
    #     """Search online for music."""

    #     await self._basic_search(ctx, query, 'music')

    # @commands.command(aliases=['file'])
    # async def files(self, ctx, *, query: str):
    #     """Search online for files."""

    #     await self._basic_search(ctx, query, 'files')

    # @commands.command(aliases=['image'])
    # async def images(self, ctx, *, query: str):
    #     """Search online for images."""

    #     await self._basic_search(ctx, query, 'images')

    # @commands.command()
    # async def it(self, ctx, *, query: str):
    #     """Search online for IT-related information."""

    #     await self._basic_search(ctx, query, 'it')

    # @commands.command(aliases=['map'])
    # async def maps(self, ctx, *, query: str):
    #     """Search online for map information."""

    #     await self._basic_search(ctx, query, 'maps')

    @commands.Cog.listener()
    async def on_command_error(self, ctx, error):
        """Listener makes no command fallback to searching."""

        fallback = (commands.CommandNotFound, commands.CheckFailure)

        if isinstance(error, fallback):
            try:
                await self._basic_search(
                    ctx, ctx.message.content[len(ctx.prefix):]
                )
            except searchexceptions.SafesearchFail:
                await ctx.send(
                    "**Sorry!** That query included language "
                    "we cannot accept in a non-NSFW channel. "
                    "Please try again in an NSFW channel."
                )


def setup(bot):

    bot.add_cog(Search(bot))
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`# -- coding: utf-8 --`

			`# Search Functionality`
Some comments 2020-03-28 18:29:18 +00:00			`# Provides search results from Qwant`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
			`'''Search Cog'''`

Organized imports 2020-04-06 16:26:15 +00:00			`from typing import List`

Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`import discord`
			`from discord.ext import commands`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`import html2text`
Escape all links 2020-03-25 02:45:43 +00:00			`import re`
Organized imports 2020-04-06 16:26:15 +00:00			`from urllib.parse import quote_plus`

Uhhh changes???? 2020-04-09 02:05:37 +00:00			`from extensions.models import searchexceptions`
Performance improvements, regex filtering 2020-04-08 22:23:14 +00:00			`from extensions.models.regex import nono_re`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Complete rearranging for modularity and dev cmds 2020-02-29 04:14:34 +00:00
Split specialty code into new cog, fixed DMs issue 2020-03-24 00:22:37 +00:00			`class Search(commands.Cog, name="Basic"):`
Finalized new help, added cog description. 2020-03-02 06:12:16 +00:00			`"""Searches the web for a variety of different resources."""`

Complete rearranging for modularity and dev cmds 2020-02-29 04:14:34 +00:00			`def __init__(self, bot):`
Added Cog Emojis 2020-03-02 18:11:15 +00:00
			`# Main Stuff`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`self.bot = bot`
New get_webhook and logging features 2020-03-03 04:08:20 +00:00			`self.info = bot.logging.info`
			`self.warn = bot.logging.warn`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`self.debug = bot.logging.debug`
Fixed global session, added no prefix in DMs 2020-02-23 22:47:51 +00:00			`self.request = bot.request`
Added Cog Emojis 2020-03-02 18:11:15 +00:00			`self.emoji = "\U0001F50D"`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`self.scrape_token = bot.config['SCRAPESTACK']`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Markdown converter`
			`self.tomd = html2text.HTML2Text()`
			`self.tomd.ignore_links = True`
			`self.tomd.ignore_images = True`
			`self.tomd.ignore_tables = True`
			`self.tomd.ignore_emphasis = True`
			`self.tomd.body_width = 0`
New Features I think 2020-03-19 18:16:22 +00:00
More filtering 2020-03-23 23:26:00 +00:00			`async def _search_logic(self, query: str, is_nsfw: bool = False,`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`category: str = 'web', count: int = 5) -> list:`
			`"""Uses scrapestack and the Qwant API to find search results."""`

			`# Typing`
			`base: str`
			`safesearch: str`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00			`# NSFW Filtering`
Fix nsfw searching, first work on tests 2020-04-09 00:18:03 +00:00			`if nono_re.match(query) and not is_nsfw:`
Uhhh changes???? 2020-04-09 02:05:37 +00:00			`raise searchexceptions.SafesearchFail('Query had NSFW.')`
Moving search to Qwant 2020-03-24 23:09:01 +00:00
			`base = "https://api.qwant.com/api"`

			`# Safesearch`
			`if is_nsfw:`
			`safesearch = "0"`
			`else:`
			`safesearch = "2"`

			`# Search URL Building`
			`# api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...`
Fix the biggest bug of them all in 3 lines 2020-11-10 23:39:15 +00:00			`# XXX Find out why quote_plus wasn't working`
			`query_with_plus = query.replace(' ', '+')`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`search_url = (`
			`f"{base}/search/{category}"`
			`f"?count={count}"`
Fix the biggest bug of them all in 3 lines 2020-11-10 23:39:15 +00:00			`f"&q={query_with_plus}"`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`f"&safesearch={safesearch}"`
			`"&t=web"`
			`"&locale=en_US"`
			`"&uiv=4"`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00			`)`
Feexed scrapeing 2020-03-25 02:57:54 +00:00
			`# Scrape or not`
			`if self.scrape_token != '':`
			`search_url = (`
			`"http://api.scrapestack.com/scrape"`
			`f"?access_key={self.scrape_token}"`
			`f"&url={quote_plus(search_url)}"`
			`)`

Made logging not block. 2020-04-07 23:56:01 +00:00			`self.debug(search_url, name="_search_logic")`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Searching`
			`headers = {`
			`'User-Agent': (`
			`'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'`
			`' Gecko/20100101 Firefox/74.0'`
			`)`
			`}`
			`async with self.request.get(search_url, headers=headers) as resp:`
			`to_parse = await resp.json()`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Sends results`
			`return to_parse['data']['result']['items']`
Robust safesearch 2020-02-27 19:09:44 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`async def _basic_search(self, ctx, query: str, category: str = 'web'):`
			`"""Basic search formatting."""`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# NOTE Customizable count not yet implemented.`
			`count: int = 5`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Safesearch variable`
			`is_nsfw = (`
			`ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')`
			`else False`
			`)`

			`# Handling`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`async with ctx.typing():`
Moving search to Qwant 2020-03-24 23:09:01 +00:00
			`# Searches`
			`results = await self._search_logic(query, is_nsfw, category)`
			`count = len(results)`

			`# Escapes all nasties for displaying`
			`query_display = discord.utils.escape_mentions(query)`
			`query_display = discord.utils.escape_markdown(query_display)`

			`# Return if no results`
			`try:`
			`results[0]`
			`except IndexError:`
			`return await ctx.send(`
			f"No results found for `{query_display}`."
			`)`

			`# Gets the first entry's stuff`
			`first_title = self.tomd.handle(results[0]['title']).rstrip('\n')`
			`first_url = results[0]['url']`
			`first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')`

			`# Builds the substring for each of the other results.`
			`other_results: List[str] = []`
			`for r in results[1:count]:`
			`title = self.tomd.handle(r['title']).rstrip('\n')`
Oh actually fix it 2020-03-24 23:25:30 +00:00			`url = r['url']`
Escape all links 2020-03-25 02:45:43 +00:00			`other_results.append(f"{title} {url}")`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`other_msg: str = "\n".join(other_results)`

			`# Builds message`
			`msg = (`
			f"Showing {count} results for `{query_display}`.\n\n"
Escape all links 2020-03-25 02:45:43 +00:00			`f"{first_title} {first_url}\n{first_desc}\n\n"`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`f"{other_msg}\n\n_Powered by Qwant._"`
Split specialty code into new cog, fixed DMs issue 2020-03-24 00:22:37 +00:00			`)`

Escape all links 2020-03-25 02:45:43 +00:00			`msg = re.sub(`
			`r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'`
			`r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',`
			`r'<\1>',`
			`msg`
			`)`


Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Sends message`
Made logging not block. 2020-04-07 23:56:01 +00:00			`self.info(`
Some log formatting 2020-03-24 23:35:25 +00:00			f"New Search - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
Even more formatting 2020-03-24 23:36:10 +00:00			`name="New Search"`
Changed to an info 2020-03-24 23:34:45 +00:00			`)`
Split specialty code into new cog, fixed DMs issue 2020-03-24 00:22:37 +00:00			`await ctx.send(msg)`

			`@commands.command()`
			`async def search(self, ctx, *, query: str):`
			`"""Search online for general results."""`

			`await self._basic_search(ctx, query)`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['video'])`
			`# async def videos(self, ctx, *, query: str):`
			`# """Search online for videos."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'videos')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command()`
			`# async def music(self, ctx, *, query: str):`
			`# """Search online for music."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'music')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['file'])`
			`# async def files(self, ctx, *, query: str):`
			`# """Search online for files."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'files')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['image'])`
			`# async def images(self, ctx, *, query: str):`
			`# """Search online for images."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'images')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command()`
			`# async def it(self, ctx, *, query: str):`
			`# """Search online for IT-related information."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'it')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['map'])`
			`# async def maps(self, ctx, *, query: str):`
			`# """Search online for map information."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'maps')`
added anime and manga search command 2020-03-21 16:37:15 +00:00
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`@commands.Cog.listener()`
			`async def on_command_error(self, ctx, error):`
			`"""Listener makes no command fallback to searching."""`

Moving search to Qwant 2020-03-24 23:09:01 +00:00			`fallback = (commands.CommandNotFound, commands.CheckFailure)`
More filtering 2020-03-23 23:26:00 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`if isinstance(error, fallback):`
			`try:`
			`await self._basic_search(`
			`ctx, ctx.message.content[len(ctx.prefix):]`
			`)`
Uhhh changes???? 2020-04-09 02:05:37 +00:00			`except searchexceptions.SafesearchFail:`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`await ctx.send(`
			`"Sorry! That query included language "`
			`"we cannot accept in a non-NSFW channel. "`
			`"Please try again in an NSFW channel."`
			`)`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Fixed developer cog_check 2020-02-28 16:02:24 +00:00
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`def setup(bot):`
Moving search to Qwant 2020-03-24 23:09:01 +00:00
Complete rearranging for modularity and dev cmds 2020-02-29 04:14:34 +00:00			`bot.add_cog(Search(bot))`