searchbot-discord/old_search.py

# This is the old search logic for reference purposes 
 
    async def _old_search_logic(self, query: str, is_nsfw: bool = False,
                                category: str = None) -> str:
        """Provides search logic for all search commands."""

        # NSFW Filtering
        # WARNING - This list includes slurs.
        nono_words = [
            'tranny', 'faggot', 'fag',
            'porn', 'cock', 'dick',
            'titty', 'boob', 'penis',
            'slut', 'cum', 'jizz',
            'semen', 'cooch', 'coochie',
            'pussy', 'penis', 'fetish',
            'bdsm', 'sexy', 'xxx',
            'orgasm', 'masturbation',
            'erotic', 'creampie',
            'fap', 'nude', 'orgasm',
            'squirting', 'yiff',
            'e621'
        ]
        nono_sites = [
            'xvideos', 'pornhub',
            'xhamster', 'xnxx',
            'youporn', 'xxx',
            'freexcafe', 'sex.com',
            'e621', 'nhentai'
        ]

        if not is_nsfw:
            for i in nono_words:
                if i in query.replace(" ", ""):
                    return (
                        "**Sorry!** That query included language "
                        "we cannot accept in a non-NSFW channel. "
                        "Please try again in an NSFW channel."
                    )

        # Choose an instance
        if self.instances == []:
            with open('searxes.txt') as f:
                self.instances = f.read().split('\n')
        instance = random.sample(self.instances, k=1)[0]

        # Error Template
        error_msg = (
            "**An error occured!**\n\n"
            f"There was a problem with `{instance}`. Please try again later.\n"
            f"_If problems with this instance persist, "
            f"contact`{self.bot.appinfo.owner}` to have it removed._"
        )

        # Create the URL to make an API call to
        call = f'{instance}search?q={query}&format=json&language=en-US'

        # If a type is provided, add that type to the call URL
        if category:
            call += f'&categories={category}'

        if is_nsfw:
            call += '&safesearch=0'
        else:
            call += '&safesearch=1'

        # Figure out engines for different categories to get decent results.
        if category == 'videos':
            call += '&engines=bing+videos,google+videos'
        # Make said API call
        try:
            async with self.request.get(call) as resp:
                response = await resp.json()
        except aiohttp.ClientError:
            return error_msg

        # Split our response data up for parsing
        # infoboxes = response['infoboxes']
        results = response['results']

        # Create message with results
        try:
            # Handle tiny result count
            if len(results) > 5:
                amt = 5
            else:
                amt = len(results)

            # Remove no-no sites
            if not is_nsfw:
                for r in results[0:7]:
                    for n in nono_sites:
                        if n in r['url']:
                            results.remove(r)

            # Escape stuff
            query = discord.utils.escape_mentions(query)
            query = discord.utils.escape_markdown(query)

            # Header
            msg = f"Showing **{amt}** results for `{query}`. \n\n"
            # Expanded Result
            msg += (
                f"**{results[0]['title']}** <{results[0]['url']}>\n"
                f"{results[0]['content']}\n\n")
            # Other Results
            msg += "\n".join(
                [f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
            # Instance Info
            msg += f"\n\n_Results retrieved from instance `{instance}`._"

            return msg

        # Reached if error with returned results
        except (KeyError, IndexError) as e:
            # Logging
            self.warn(
                f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
                "Consider removing it or looking into it.",
                name="Failed Instance"
            )

            self.instances.remove(instance)  # Weed the instance out
            # Recurse until good response
            return await self._old_search_logic(query, is_nsfw)

    async def _instance_check(self, instance: str, content: dict) -> bool:
        """Checks the quality of an instance."""

        # Makes sure proper values exist
        if 'error' in content:
            return False
        if not ('engines' in content and 'initial' in content['timing']):
            return False
        if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):
            return False

        # Makes sure google is enabled
        if not content['engines']['google']['enabled']:
            return False

        # Makes sure is not Tor
        if content['network_type'] != 'normal':
            return False

        # Only picks instances that are fast enough
        timing = int(content['timing']['initial'])
        if timing > 0.20:
            return False

        # Check for Google captcha
        test_search = f'{instance}/search?q=test&format=json&lang=en-US'
        try:
            async with self.request.get(test_search) as resp:
                response = await resp.json()
            response['results'][0]['content']
        except (aiohttp.ClientError, KeyError, IndexError):
            return False

        # Reached if passes all checks
        return True

            @commands.command()
    @commands.is_owner()
    async def rejson(self, ctx):
        """Refreshes the list of instances for searx."""

        msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'
                             '(Due to extensive quality checks, this may take a bit.)')
        plausible: List[str] = []

        # Get, parse, and quality check all instances
        async with self.request.get('https://searx.space/data/instances.json') as r:
            # Parsing
            searx_json = await r.json()
            instances = searx_json['instances']

            # Quality Check
            for i in instances:
                content = instances.get(i)
                is_good: bool = await self._instance_check(i, content)
                if is_good:
                    plausible.append(i)

        # Save new list
        self.instances = plausible
        with open('searxes.txt', 'w') as f:
            f.write('\n'.join(plausible))

        await msg.edit(content='Instances refreshed!')

            async def _old_basic_search(self, ctx, query: str,
                                category: str = None):
        """Base search message generation."""

        async with ctx.typing():
            is_nsfw = (
                ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
                else False
            )

            msg = await self._old_search_logic(query, is_nsfw, category)
            await ctx.send(msg)

            self.info(
                content=(
                    f"**{ctx.author}** searched for `{query}` "
                    f"in \"{ctx.guild}\" and got this:"
                    f"\n\n{msg}"
                ),
                name="Search Results"
            )
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`# This is the old search logic for reference purposes`

			`async def _old_search_logic(self, query: str, is_nsfw: bool = False,`
			`category: str = None) -> str:`
			`"""Provides search logic for all search commands."""`

			`# NSFW Filtering`
			`# WARNING - This list includes slurs.`
			`nono_words = [`
			`'tranny', 'faggot', 'fag',`
			`'porn', 'cock', 'dick',`
			`'titty', 'boob', 'penis',`
			`'slut', 'cum', 'jizz',`
			`'semen', 'cooch', 'coochie',`
			`'pussy', 'penis', 'fetish',`
			`'bdsm', 'sexy', 'xxx',`
			`'orgasm', 'masturbation',`
			`'erotic', 'creampie',`
			`'fap', 'nude', 'orgasm',`
			`'squirting', 'yiff',`
			`'e621'`
			`]`
			`nono_sites = [`
			`'xvideos', 'pornhub',`
			`'xhamster', 'xnxx',`
			`'youporn', 'xxx',`
			`'freexcafe', 'sex.com',`
			`'e621', 'nhentai'`
			`]`

			`if not is_nsfw:`
			`for i in nono_words:`
			`if i in query.replace(" ", ""):`
			`return (`
			`"Sorry! That query included language "`
			`"we cannot accept in a non-NSFW channel. "`
			`"Please try again in an NSFW channel."`
			`)`

			`# Choose an instance`
			`if self.instances == []:`
			`with open('searxes.txt') as f:`
			`self.instances = f.read().split('\n')`
			`instance = random.sample(self.instances, k=1)[0]`

			`# Error Template`
			`error_msg = (`
			`"An error occured!\n\n"`
			f"There was a problem with `{instance}`. Please try again later.\n"
			`f"_If problems with this instance persist, "`
			f"contact`{self.bot.appinfo.owner}` to have it removed._"
			`)`

			`# Create the URL to make an API call to`
			`call = f'{instance}search?q={query}&format=json&language=en-US'`

			`# If a type is provided, add that type to the call URL`
			`if category:`
			`call += f'&categories={category}'`

			`if is_nsfw:`
			`call += '&safesearch=0'`
			`else:`
			`call += '&safesearch=1'`

			`# Figure out engines for different categories to get decent results.`
			`if category == 'videos':`
			`call += '&engines=bing+videos,google+videos'`
			`# Make said API call`
			`try:`
			`async with self.request.get(call) as resp:`
			`response = await resp.json()`
			`except aiohttp.ClientError:`
			`return error_msg`

			`# Split our response data up for parsing`
			`# infoboxes = response['infoboxes']`
			`results = response['results']`

			`# Create message with results`
			`try:`
			`# Handle tiny result count`
			`if len(results) > 5:`
			`amt = 5`
			`else:`
			`amt = len(results)`

			`# Remove no-no sites`
			`if not is_nsfw:`
			`for r in results[0:7]:`
			`for n in nono_sites:`
			`if n in r['url']:`
			`results.remove(r)`

			`# Escape stuff`
			`query = discord.utils.escape_mentions(query)`
			`query = discord.utils.escape_markdown(query)`

			`# Header`
			msg = f"Showing {amt} results for `{query}`. \n\n"
			`# Expanded Result`
			`msg += (`
			`f"{results[0]['title']} <{results[0]['url']}>\n"`
			`f"{results[0]['content']}\n\n")`
			`# Other Results`
			`msg += "\n".join(`
			`[f"{entry['title']} <{entry['url']}>" for entry in results[1:5]])`
			`# Instance Info`
			msg += f"\n\n_Results retrieved from instance `{instance}`._"

			`return msg`

			`# Reached if error with returned results`
			`except (KeyError, IndexError) as e:`
			`# Logging`
Made logging not block. 2020-04-07 23:56:01 +00:00			`self.warn(`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
			`"Consider removing it or looking into it.",`
			`name="Failed Instance"`
			`)`

			`self.instances.remove(instance) # Weed the instance out`
			`# Recurse until good response`
			`return await self._old_search_logic(query, is_nsfw)`

			`async def _instance_check(self, instance: str, content: dict) -> bool:`
			`"""Checks the quality of an instance."""`

			`# Makes sure proper values exist`
			`if 'error' in content:`
			`return False`
			`if not ('engines' in content and 'initial' in content['timing']):`
			`return False`
			`if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):`
			`return False`

			`# Makes sure google is enabled`
			`if not content['engines']['google']['enabled']:`
			`return False`

			`# Makes sure is not Tor`
			`if content['network_type'] != 'normal':`
			`return False`

			`# Only picks instances that are fast enough`
			`timing = int(content['timing']['initial'])`
			`if timing > 0.20:`
			`return False`

			`# Check for Google captcha`
			`test_search = f'{instance}/search?q=test&format=json&lang=en-US'`
			`try:`
			`async with self.request.get(test_search) as resp:`
			`response = await resp.json()`
			`response['results'][0]['content']`
			`except (aiohttp.ClientError, KeyError, IndexError):`
			`return False`

			`# Reached if passes all checks`
			`return True`

			`@commands.command()`
			`@commands.is_owner()`
			`async def rejson(self, ctx):`
			`"""Refreshes the list of instances for searx."""`

			`msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'`
			`'(Due to extensive quality checks, this may take a bit.)')`
			`plausible: List[str] = []`

			`# Get, parse, and quality check all instances`
			`async with self.request.get('https://searx.space/data/instances.json') as r:`
			`# Parsing`
			`searx_json = await r.json()`
			`instances = searx_json['instances']`

			`# Quality Check`
			`for i in instances:`
			`content = instances.get(i)`
			`is_good: bool = await self._instance_check(i, content)`
			`if is_good:`
			`plausible.append(i)`

			`# Save new list`
			`self.instances = plausible`
			`with open('searxes.txt', 'w') as f:`
			`f.write('\n'.join(plausible))`

			`await msg.edit(content='Instances refreshed!')`

			`async def _old_basic_search(self, ctx, query: str,`
			`category: str = None):`
			`"""Base search message generation."""`

			`async with ctx.typing():`
			`is_nsfw = (`
			`ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')`
			`else False`
			`)`

			`msg = await self._old_search_logic(query, is_nsfw, category)`
			`await ctx.send(msg)`

Made logging not block. 2020-04-07 23:56:01 +00:00			`self.info(`
Moving search to Qwant 2020-03-24 23:09:01 +00:00			`content=(`
			f"{ctx.author} searched for `{query}` "
			`f"in \"{ctx.guild}\" and got this:"`
			`f"\n\n{msg}"`
			`),`
			`name="Search Results"`
			`)`