searchbot-discord/extensions/search.py

# -*- coding: utf-8 -*-

# Search Functionality
# Provides search results from Qwant

'''Search Cog'''

from typing import List

import discord
from discord.ext import commands
import html2text
import re
from urllib.parse import quote_plus

from extensions.models import SearchExceptions


class Search(commands.Cog, name="Basic"):
    """Searches the web for a variety of different resources."""

    def __init__(self, bot):

        # Main Stuff
        self.bot = bot
        self.info = bot.logging.info
        self.warn = bot.logging.warn
        self.debug = bot.logging.debug
        self.request = bot.request
        self.emoji = "\U0001F50D"
        self.scrape_token = bot.config['SCRAPESTACK']

        # Markdown converter
        self.tomd = html2text.HTML2Text()
        self.tomd.ignore_links = True
        self.tomd.ignore_images = True
        self.tomd.ignore_tables = True
        self.tomd.ignore_emphasis = True
        self.tomd.body_width = 0

    async def _search_logic(self, query: str, is_nsfw: bool = False,
                            category: str = 'web', count: int = 5) -> list:
        """Uses scrapestack and the Qwant API to find search results."""

        # Typing
        base: str
        safesearch: str

        # NSFW Filtering
        # WARNING This list includes slurs.
        nono_words = [
            'tranny', 'faggot', 'fag', 'porn', 'cock', 'dick',
            'titty', ' tit ', 'boob', 'penis', 'slut', ' cum ', 'jizz',
            'semen', 'cooch', 'coochie', 'pussy', 'penis', 'fetish',
            'bdsm', 'sexy', 'xxx', 'orgasm', 'masturbat',
            'erotic', 'creampie', 'fap', 'nude', 'orgasm',
            'squirting', 'yiff', 'e621', ' sex', 'ejaculat',
            'cunt', 'vagina', 'coom', 'troon', 'hentai', 'yaoi',
            'bukkake', 'bara', 'shota', 'loli', 'fetish', 'spunk',
            'pron', 'p0rn', 'pr0n', 'gloryhole', 'felch', 'skullfuck',
            'scat', 'pissplay', 'piss play', 'underage', 'bbw',
            'fisting', 'queef', "rimming", 'rimjob', 'bdsm',
            'cbt', 'blumpkin', 'boner', 'prostitut', 'butt plug',
            'transvestite', 'femboy', 'castrat', 'philia', 'edging',
            'edgeplay', 'enema', 'facial', 'fellat', 'femdom', 'footjob',
            'blowjob', 'titjob', 'handjob', 'frot', 'gang bang', 'gangbang',
            'glory hole', 'hermap', 'jerk off', 'jerking off', 'jack off',
            'jacking off', 'kink', 'wet dream', 'anal', 'pegging', 'precum',
            'pre-cum', 'pre cum', 'priap', 'scrotum', 'shemale', 'smegma',
            'smut', 'softcore', 'transsexual', 'voyeur', 'viagra', 'wank',
            'whore'
        ]

        if any(n in query for n in nono_words):
            raise SearchExceptions.SafesearchFail('Query had NSFW.')

        base = "https://api.qwant.com/api"

        # Safesearch
        if is_nsfw:
            safesearch = "0"
        else:
            safesearch = "2"

        # Search URL Building
        # api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...
        search_url = (
            f"{base}/search/{category}"
            f"?count={count}"
            f"&q={query}"
            f"&safesearch={safesearch}"
            "&t=web"
            "&locale=en_US"
            "&uiv=4"
        )

        # Scrape or not
        if self.scrape_token != '':
            search_url = (
                "http://api.scrapestack.com/scrape"
                f"?access_key={self.scrape_token}"
                f"&url={quote_plus(search_url)}"
            )

        await self.debug(search_url, name="_search_logic")

        # Searching
        headers = {
            'User-Agent': (
                'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'
                ' Gecko/20100101 Firefox/74.0'
            )
        }
        async with self.request.get(search_url, headers=headers) as resp:
            to_parse = await resp.json()
            print(to_parse)

            # Sends results
            return to_parse['data']['result']['items']

    async def _basic_search(self, ctx, query: str, category: str = 'web'):
        """Basic search formatting."""

        # NOTE Customizable count not yet implemented.
        count: int = 5

        # Safesearch variable
        is_nsfw = (
            ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
            else False
        )

        # Handling
        async with ctx.typing():

            # Searches
            results = await self._search_logic(query, is_nsfw, category)
            count = len(results)

            # Escapes all nasties for displaying
            query_display = discord.utils.escape_mentions(query)
            query_display = discord.utils.escape_markdown(query_display)

            # Return if no results
            try:
                results[0]
            except IndexError:
                return await ctx.send(
                    f"No results found for `{query_display}`."
                )

            # Gets the first entry's stuff
            first_title = self.tomd.handle(results[0]['title']).rstrip('\n')
            first_url = results[0]['url']
            first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')

            # Builds the substring for each of the other results.
            other_results: List[str] = []
            for r in results[1:count]:
                title = self.tomd.handle(r['title']).rstrip('\n')
                url = r['url']
                other_results.append(f"**{title}** {url}")
            other_msg: str = "\n".join(other_results)

            # Builds message
            msg = (
                f"Showing **{count}** results for `{query_display}`.\n\n"
                f"**{first_title}** {first_url}\n{first_desc}\n\n"
                f"{other_msg}\n\n_Powered by Qwant._"
            )

            print(msg)

            msg = re.sub(
                r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
                r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
                r'<\1>',
                msg
            )


            # Sends message
            await self.info(
                f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
                name="New Search"
            )
            await ctx.send(msg)

    @commands.command()
    async def search(self, ctx, *, query: str):
        """Search online for general results."""

        await self._basic_search(ctx, query)

    # @commands.command(aliases=['video'])
    # async def videos(self, ctx, *, query: str):
    #     """Search online for videos."""

    #     await self._basic_search(ctx, query, 'videos')

    # @commands.command()
    # async def music(self, ctx, *, query: str):
    #     """Search online for music."""

    #     await self._basic_search(ctx, query, 'music')

    # @commands.command(aliases=['file'])
    # async def files(self, ctx, *, query: str):
    #     """Search online for files."""

    #     await self._basic_search(ctx, query, 'files')

    # @commands.command(aliases=['image'])
    # async def images(self, ctx, *, query: str):
    #     """Search online for images."""

    #     await self._basic_search(ctx, query, 'images')

    # @commands.command()
    # async def it(self, ctx, *, query: str):
    #     """Search online for IT-related information."""

    #     await self._basic_search(ctx, query, 'it')

    # @commands.command(aliases=['map'])
    # async def maps(self, ctx, *, query: str):
    #     """Search online for map information."""

    #     await self._basic_search(ctx, query, 'maps')

    @commands.Cog.listener()
    async def on_command_error(self, ctx, error):
        """Listener makes no command fallback to searching."""

        fallback = (commands.CommandNotFound, commands.CheckFailure)

        if isinstance(error, fallback):
            try:
                await self._basic_search(
                    ctx, ctx.message.content[len(ctx.prefix):]
                )
            except SearchExceptions.SafesearchFail:
                await ctx.send(
                    "**Sorry!** That query included language "
                    "we cannot accept in a non-NSFW channel. "
                    "Please try again in an NSFW channel."
                )


def setup(bot):

    bot.add_cog(Search(bot))
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`# -- coding: utf-8 --`

			`# Search Functionality`
Some comments 2020-03-28 18:29:18 +00:00			`# Provides search results from Qwant`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
			`'''Search Cog'''`

Organized imports 2020-04-06 16:26:15 +00:00			`from typing import List`

Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`import discord`
			`from discord.ext import commands`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`import html2text`
Escape all links 2020-03-25 02:45:43 +00:00			`import re`
Organized imports 2020-04-06 16:26:15 +00:00			`from urllib.parse import quote_plus`

			`from extensions.models import SearchExceptions`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Complete rearranging for modularity and dev cmds 2020-02-29 04:14:34 +00:00
Split specialty code into new cog, fixed DMs issue 2020-03-24 00:22:37 +00:00			`class Search(commands.Cog, name="Basic"):`
Finalized new help, added cog description. 2020-03-02 06:12:16 +00:00			`"""Searches the web for a variety of different resources."""`

Complete rearranging for modularity and dev cmds 2020-02-29 04:14:34 +00:00			`def __init__(self, bot):`
Added Cog Emojis 2020-03-02 18:11:15 +00:00
			`# Main Stuff`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`self.bot = bot`
New get_webhook and logging features 2020-03-03 04:08:20 +00:00			`self.info = bot.logging.info`
			`self.warn = bot.logging.warn`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`self.debug = bot.logging.debug`
Fixed global session, added no prefix in DMs 2020-02-23 22:47:51 +00:00			`self.request = bot.request`
Added Cog Emojis 2020-03-02 18:11:15 +00:00			`self.emoji = "\U0001F50D"`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`self.scrape_token = bot.config['SCRAPESTACK']`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Markdown converter`
			`self.tomd = html2text.HTML2Text()`
			`self.tomd.ignore_links = True`
			`self.tomd.ignore_images = True`
			`self.tomd.ignore_tables = True`
			`self.tomd.ignore_emphasis = True`
			`self.tomd.body_width = 0`
New Features I think 2020-03-19 18:16:22 +00:00
More filtering 2020-03-23 23:26:00 +00:00			`async def _search_logic(self, query: str, is_nsfw: bool = False,`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`category: str = 'web', count: int = 5) -> list:`
			`"""Uses scrapestack and the Qwant API to find search results."""`

			`# Typing`
			`base: str`
			`safesearch: str`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00			`# NSFW Filtering`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# WARNING This list includes slurs.`
Robust safesearch 2020-02-27 19:09:44 +00:00			`nono_words = [`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`'tranny', 'faggot', 'fag', 'porn', 'cock', 'dick',`
			`'titty', ' tit ', 'boob', 'penis', 'slut', ' cum ', 'jizz',`
			`'semen', 'cooch', 'coochie', 'pussy', 'penis', 'fetish',`
			`'bdsm', 'sexy', 'xxx', 'orgasm', 'masturbat',`
			`'erotic', 'creampie', 'fap', 'nude', 'orgasm',`
			`'squirting', 'yiff', 'e621', ' sex', 'ejaculat',`
			`'cunt', 'vagina', 'coom', 'troon', 'hentai', 'yaoi',`
			`'bukkake', 'bara', 'shota', 'loli', 'fetish', 'spunk',`
			`'pron', 'p0rn', 'pr0n', 'gloryhole', 'felch', 'skullfuck',`
			`'scat', 'pissplay', 'piss play', 'underage', 'bbw',`
			`'fisting', 'queef', "rimming", 'rimjob', 'bdsm',`
			`'cbt', 'blumpkin', 'boner', 'prostitut', 'butt plug',`
			`'transvestite', 'femboy', 'castrat', 'philia', 'edging',`
			`'edgeplay', 'enema', 'facial', 'fellat', 'femdom', 'footjob',`
			`'blowjob', 'titjob', 'handjob', 'frot', 'gang bang', 'gangbang',`
			`'glory hole', 'hermap', 'jerk off', 'jerking off', 'jack off',`
			`'jacking off', 'kink', 'wet dream', 'anal', 'pegging', 'precum',`
			`'pre-cum', 'pre cum', 'priap', 'scrotum', 'shemale', 'smegma',`
			`'smut', 'softcore', 'transsexual', 'voyeur', 'viagra', 'wank',`
			`'whore'`
Robust safesearch 2020-02-27 19:09:44 +00:00			`]`

Moving search to Qwant 2020-03-24 23:09:01 +00:00			`if any(n in query for n in nono_words):`
			`raise SearchExceptions.SafesearchFail('Query had NSFW.')`

			`base = "https://api.qwant.com/api"`

			`# Safesearch`
			`if is_nsfw:`
			`safesearch = "0"`
			`else:`
			`safesearch = "2"`

			`# Search URL Building`
			`# api.qwant.com/api/search/web?count=5&q=test&safesearch=2&...`
			`search_url = (`
			`f"{base}/search/{category}"`
			`f"?count={count}"`
			`f"&q={query}"`
			`f"&safesearch={safesearch}"`
			`"&t=web"`
			`"&locale=en_US"`
			`"&uiv=4"`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00			`)`
Feexed scrapeing 2020-03-25 02:57:54 +00:00
			`# Scrape or not`
			`if self.scrape_token != '':`
			`search_url = (`
			`"http://api.scrapestack.com/scrape"`
			`f"?access_key={self.scrape_token}"`
			`f"&url={quote_plus(search_url)}"`
			`)`

Moving search to Qwant 2020-03-24 23:09:01 +00:00			`await self.debug(search_url, name="_search_logic")`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Searching`
			`headers = {`
			`'User-Agent': (`
			`'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:74.0)'`
			`' Gecko/20100101 Firefox/74.0'`
			`)`
			`}`
			`async with self.request.get(search_url, headers=headers) as resp:`
			`to_parse = await resp.json()`
Escape all links 2020-03-25 02:45:43 +00:00			`print(to_parse)`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Sends results`
			`return to_parse['data']['result']['items']`
Robust safesearch 2020-02-27 19:09:44 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`async def _basic_search(self, ctx, query: str, category: str = 'web'):`
			`"""Basic search formatting."""`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# NOTE Customizable count not yet implemented.`
			`count: int = 5`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Safesearch variable`
			`is_nsfw = (`
			`ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')`
			`else False`
			`)`

			`# Handling`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`async with ctx.typing():`
Moving search to Qwant 2020-03-24 23:09:01 +00:00
			`# Searches`
			`results = await self._search_logic(query, is_nsfw, category)`
			`count = len(results)`

			`# Escapes all nasties for displaying`
			`query_display = discord.utils.escape_mentions(query)`
			`query_display = discord.utils.escape_markdown(query_display)`

			`# Return if no results`
			`try:`
			`results[0]`
			`except IndexError:`
			`return await ctx.send(`
			f"No results found for `{query_display}`."
			`)`

			`# Gets the first entry's stuff`
			`first_title = self.tomd.handle(results[0]['title']).rstrip('\n')`
			`first_url = results[0]['url']`
			`first_desc = self.tomd.handle(results[0]['desc']).rstrip('\n')`

			`# Builds the substring for each of the other results.`
			`other_results: List[str] = []`
			`for r in results[1:count]:`
			`title = self.tomd.handle(r['title']).rstrip('\n')`
Oh actually fix it 2020-03-24 23:25:30 +00:00			`url = r['url']`
Escape all links 2020-03-25 02:45:43 +00:00			`other_results.append(f"{title} {url}")`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`other_msg: str = "\n".join(other_results)`

			`# Builds message`
			`msg = (`
			f"Showing {count} results for `{query_display}`.\n\n"
Escape all links 2020-03-25 02:45:43 +00:00			`f"{first_title} {first_url}\n{first_desc}\n\n"`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`f"{other_msg}\n\n_Powered by Qwant._"`
Split specialty code into new cog, fixed DMs issue 2020-03-24 00:22:37 +00:00			`)`

Escape all links 2020-03-25 02:45:43 +00:00			`print(msg)`

			`msg = re.sub(`
			`r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'`
			`r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',`
			`r'<\1>',`
			`msg`
			`)`


Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# Sends message`
Changed to an info 2020-03-24 23:34:45 +00:00			`await self.info(`
Some log formatting 2020-03-24 23:35:25 +00:00			f"New Search - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
Even more formatting 2020-03-24 23:36:10 +00:00			`name="New Search"`
Changed to an info 2020-03-24 23:34:45 +00:00			`)`
Split specialty code into new cog, fixed DMs issue 2020-03-24 00:22:37 +00:00			`await ctx.send(msg)`

			`@commands.command()`
			`async def search(self, ctx, *, query: str):`
			`"""Search online for general results."""`

			`await self._basic_search(ctx, query)`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['video'])`
			`# async def videos(self, ctx, *, query: str):`
			`# """Search online for videos."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'videos')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command()`
			`# async def music(self, ctx, *, query: str):`
			`# """Search online for music."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'music')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['file'])`
			`# async def files(self, ctx, *, query: str):`
			`# """Search online for files."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'files')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['image'])`
			`# async def images(self, ctx, *, query: str):`
			`# """Search online for images."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'images')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command()`
			`# async def it(self, ctx, *, query: str):`
			`# """Search online for IT-related information."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'it')`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# @commands.command(aliases=['map'])`
			`# async def maps(self, ctx, *, query: str):`
			`# """Search online for map information."""`
Vote, config-example, search changes - Added vote command to botlist cog - Added aliases for categories to search cog - Added blank tokens to config example 2020-03-02 02:50:49 +00:00
Commenting out other commands until i know 2020-03-25 04:52:38 +00:00			`# await self._basic_search(ctx, query, 'maps')`
added anime and manga search command 2020-03-21 16:37:15 +00:00
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`@commands.Cog.listener()`
			`async def on_command_error(self, ctx, error):`
			`"""Listener makes no command fallback to searching."""`

Moving search to Qwant 2020-03-24 23:09:01 +00:00			`fallback = (commands.CommandNotFound, commands.CheckFailure)`
More filtering 2020-03-23 23:26:00 +00:00
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`if isinstance(error, fallback):`
			`try:`
			`await self._basic_search(`
			`ctx, ctx.message.content[len(ctx.prefix):]`
			`)`
			`except SearchExceptions.SafesearchFail:`
			`await ctx.send(`
			`"Sorry! That query included language "`
			`"we cannot accept in a non-NSFW channel. "`
			`"Please try again in an NSFW channel."`
			`)`
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00
Fixed developer cog_check 2020-02-28 16:02:24 +00:00
Full Rejigging of Structure 2020-02-22 21:42:46 +00:00			`def setup(bot):`
Moving search to Qwant 2020-03-24 23:09:01 +00:00
Complete rearranging for modularity and dev cmds 2020-02-29 04:14:34 +00:00			`bot.add_cog(Search(bot))`