Initial work on pagination

2024-08-14 22:46:55 +00:00 · 2020-04-07 17:40:42 -04:00 · 2020-04-07 17:40:42 -04:00 · cbea2c6440
commit cbea2c6440
parent c64e52b865
4 changed files with 442 additions and 194 deletions
--- a/extensions/models/search_source.py
+++ b/extensions/models/search_source.py
@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+
+# search source
+# Provides paginator sources for the search cog.
+
+"""Search Source File"""
+
+from typing import Callable, List, Tuple, Optional, Any
+
+import discord
+from discord.ext import menus
+import html2text
+import re
+
+FetcherArgs = Tuple[Any]
+Fetcher = Callable[..., List]
+
+# Markdown converter
+tomd = html2text.HTML2Text()
+tomd.ignore_links = True
+tomd.ignore_images = True
+tomd.ignore_tables = True
+tomd.ignore_emphasis = True
+tomd.body_width = 0
+
+
+# TODO Change around value names, make it general
+class Result:
+    """A class that holds the general data for a search result.
+
+    Parameters:
+
+    title (str): Title of the content.
+
+    url (str): The direct link to the content.
+
+    desc (str): The content's description.
+
+    source (Optional[str]): The source site. Defaults to url. 
+
+    image (Optional[str]): The content's image.
+    """
+
+    def __init__(self, title: str, url: str,
+                 desc: str = "No description provided.",
+                 source: Optional[str] = None,  image: Optional[str] = None):
+        self.url = url
+        if title in [None, ""]:
+            self.title = "Unknown"
+        else:
+            self.title = title
+        self.desc = desc
+        self.source = source
+        self.image = image
+
+    def __repr__(self):
+        fmt = f'<Image url={self.url} title={self.title} source={self.source}>'
+        return fmt
+
+
+class NormalSource(menus.AsyncIteratorPageSource):
+    def __init__(self, query: str, fetcher: FetcherArgs, per_page: int,
+                 header: str = "", footer: str = ""):
+        self.header = header
+        self.footer = footer
+        self.query = query
+
+        super().__init__(self._generate(fetcher), per_page=per_page)
+
+    async def _generate(self, fetcher: Fetcher):
+        offset = 0
+        per_request = 10
+        # TODO put the generation in the fetcher itself
+        # Qwant: image - media, source - url, title - title
+        while results := await fetcher(
+                offset, per_request, self.query
+            ):
+            results
+            for r in results:
+                yield r
+            offset += per_request
+
+    async def format_page(self, menu, entries):
+        start = menu.current_page * self.per_page
+
+        # Escapes all nasties for displaying
+        query_display = discord.utils.escape_mentions(self.query)
+        query_display = discord.utils.escape_markdown(query_display)
+
+        # Return if no results
+        try:
+            entries[0]
+        except IndexError:
+            return f"No results found for `{query_display}`."
+
+        # Gets the first entry's stuff
+        first_title = tomd.handle(entries[0].title).rstrip('\n')
+        first_url = entries[0].url
+        if start == 0:
+            first_desc = tomd.handle(entries[0].desc).rstrip('\n')
+            first = f"**{first_title}** {first_url}\n{first_desc}\n\n"
+        else:
+            first =  f"**{first_title}** {first_url}\n"
+
+        # Builds the substring for each of the other results.
+        other_results: List[str] = []
+
+        for e in entries[1:5]:
+            title = tomd.handle(e.title).rstrip('\n')
+            url = e.url
+            other_results.append(f"**{title}** {url}")
+
+        other_msg = "\n".join(other_results)
+
+        # Builds message
+        msg = f"{first}{other_msg}"
+        msg = re.sub(
+            r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
+            r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
+            r'<\1>',
+            msg
+        )
+
+        content = (
+            f"{self.header}\n\n"
+            f"Showing results *{start} - {start + 5}* "
+            f"for `{self.query}`.\n\n"
+            f"{msg}\n\n"
+            f"{self.footer}"
+        )
+
+        return content
+
+
+class ImageSource(menus.AsyncIteratorPageSource):
+    def __init__(self, query: str, fetcher: FetcherArgs, args: FetcherArgs,
+                 header: str = "", footer: str = ""):
+        self.header = header
+        self.footer = footer
+        self.query = query
+        super().__init__(self._generate(fetcher, args), per_page=1)
+
+    async def _generate(self, fetcher: Fetcher, fetch_args: FetcherArgs):
+        offset = 0
+        per_request = 10
+        # TODO put the generation in the fetcher itself
+        # Qwant: image - media, source - url, title - title
+        while results := await fetcher(
+                offset, per_request, self.query, *fetch_args
+            ):
+            results
+            for r in results:
+                yield r
+            offset += per_request
+
+    async def format_page(self, menu, entry):
+        start = menu.current_page * self.per_page
+
+        content = (
+            f"{self.header}\n\n"
+            f"Showing image result `{start}` for `{self.query}``.\n\n"
+            f"<{entry.image}>"
+            f"{self.footer}\n\n"
+        )
+
+        embed = discord.Embed(
+            title=entry.title,
+            url=entry.image,
+            description=entry.source
+        )
+        embed.set_image(url=entry.image)
+
+        return {
+            "content": content,
+            "embed": embed
+        }
--- a/extensions/search.py
+++ b/extensions/search.py
@ -8,12 +8,13 @@
 from typing import List

 import discord
-from discord.ext import commands
+from discord.ext import commands, menus
 import html2text
 import re
 from urllib.parse import quote_plus

 from extensions.models import SearchExceptions
+from extensions.models.search_source import Result, NormalSource, ImageSource


 class Search(commands.Cog, name="Basic"):
@ -39,7 +40,8 @@ class Search(commands.Cog, name="Basic"):
        self.tomd.body_width = 0

    async def _search_logic(self, query: str, is_nsfw: bool = False,
-                            category: str = 'web', count: int = 5) -> list:
+                            category: str = 'web', count: int = 5,
+                            offset: int = 0) -> list:
        """Uses scrapestack and the Qwant API to find search results."""

        # Typing
@ -87,9 +89,12 @@ class Search(commands.Cog, name="Basic"):
        search_url = (
            f"{base}/search/{category}"
            f"?count={count}"
+            f"&offset={offset}"
            f"&q={query}"
            f"&safesearch={safesearch}"
-            "&t=web"
+            f"&t={category}"
+            "&extensionDisabled=true"
+            "&device=tablet"
            "&locale=en_US"
            "&uiv=4"
        )
@ -113,11 +118,47 @@ class Search(commands.Cog, name="Basic"):
        }
        async with self.request.get(search_url, headers=headers) as resp:
            to_parse = await resp.json()
-            print(to_parse)

            # Sends results
            return to_parse['data']['result']['items']

+    async def _page_search(self, ctx, query: str, count:int = 5,
+                           category: str = 'web'):
+        """Basic search formatting - this time with pages!"""
+
+        is_nsfw = (
+            ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
+            else False
+        )
+
+        async def fetcher(offset, per_request, q):
+            result_objects = []
+            results = await self._search_logic(
+                query, is_nsfw, category, per_request, offset
+            )
+
+            for r in results:
+                result = Result(
+                    title=r["title"],
+                    url=r["url"],
+                    desc=r["desc"],
+                    source=r["source"]
+                )
+                result_objects.append(result)
+
+            return result_objects
+
+        pages = menus.MenuPages(
+            source=NormalSource(
+                query, fetcher, count,
+                footer="_Powered by Qwant._"
+            ),
+            clear_reactions_after=True,
+        )
+        await pages.start(ctx)
+
+
+
    async def _basic_search(self, ctx, query: str, category: str = 'web'):
        """Basic search formatting."""

@ -169,8 +210,6 @@ class Search(commands.Cog, name="Basic"):
                f"{other_msg}\n\n_Powered by Qwant._"
            )

-            print(msg)
-
            msg = re.sub(
                r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
                r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
@ -178,7 +217,6 @@ class Search(commands.Cog, name="Basic"):
                msg
            )

-
            # Sends message
            await self.info(
                f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
@ -186,6 +224,32 @@ class Search(commands.Cog, name="Basic"):
            )
            await ctx.send(msg)

+    @commands.command()
+    async def paginated_search(self, ctx, *, query: str):
+
+        async with ctx.typing():
+            await self._page_search(ctx, query)
+
+        # async def fetcher(offset, per_request, q, *args):
+        #     result_objects = []
+        #     results = await self._search_logic(
+        #         q, False, "images", per_request, offset)
+        #     for r in results:
+        #         image = Result(
+        #             title=r["title"],
+        #             url=r["media"],
+        #             source=r["url"],
+        #             image=r["media"]
+        #         )
+        #         result_objects.append(image)
+        #     return result_objects
+
+        # pages = menus.MenuPages(
+        #     source=ImageSource(query,  fetcher, (None,)),
+        #     clear_reactions_after=True)
+        # await pages.start(ctx)
+
+
    @commands.command()
    async def search(self, ctx, *, query: str):
        """Search online for general results."""
--- a/main.py
+++ b/main.py
@ -5,7 +5,8 @@
 # Used and modified with permission.
 # See LICENSE for license information.

-'''Main File'''
+
+"""Main File"""

 import json
 import os
@ -15,13 +16,14 @@ from typing import List
 import aiohttp
 import discord
 from discord.ext import commands
+from discord.ext.menus import CannotAddReactions
 import rethinkdb

 from extensions.models import SearchExceptions


 class Bot(commands.Bot):
-    """Custom Bot Class that subclasses the commands.ext one"""
+    """Custom Bot Class that subclasses the commands.ext one."""

    def __init__(self, **options):
        """Initializes the main parts of the bot."""
@ -271,6 +273,9 @@ async def on_command_error(ctx, error):
            "Please try again in an NSFW channel."
        )

+    elif isinstance(error, CannotAddReactions):
+        await ctx.send("**I cannot add reactions for pagination here!**")
+
    # Provides a very pretty embed if something's actually a dev's fault.
    elif isinstance(error, commands.CommandInvokeError):

--- a/old_search.py
+++ b/old_search.py
@ -1,211 +1,214 @@
-# This is the old search logic for reference purposes 
- 
-    async def _old_search_logic(self, query: str, is_nsfw: bool = False,
-                                category: str = None) -> str:
-        """Provides search logic for all search commands."""
+# This is the old search logic for reference purposes

-        # NSFW Filtering
-        # WARNING - This list includes slurs.
-        nono_words = [
-            'tranny', 'faggot', 'fag',
-            'porn', 'cock', 'dick',
-            'titty', 'boob', 'penis',
-            'slut', 'cum', 'jizz',
-            'semen', 'cooch', 'coochie',
-            'pussy', 'penis', 'fetish',
-            'bdsm', 'sexy', 'xxx',
-            'orgasm', 'masturbation',
-            'erotic', 'creampie',
-            'fap', 'nude', 'orgasm',
-            'squirting', 'yiff',
-            'e621'
-        ]
-        nono_sites = [
-            'xvideos', 'pornhub',
-            'xhamster', 'xnxx',
-            'youporn', 'xxx',
-            'freexcafe', 'sex.com',
-            'e621', 'nhentai'
-        ]

+async def _old_search_logic(self, query: str, is_nsfw: bool = False,
+                            category: str = None) -> str:
+    """Provides search logic for all search commands."""
+
+    # NSFW Filtering
+    # WARNING - This list includes slurs.
+    nono_words = [
+        'tranny', 'faggot', 'fag',
+        'porn', 'cock', 'dick',
+        'titty', 'boob', 'penis',
+        'slut', 'cum', 'jizz',
+        'semen', 'cooch', 'coochie',
+        'pussy', 'penis', 'fetish',
+        'bdsm', 'sexy', 'xxx',
+        'orgasm', 'masturbation',
+        'erotic', 'creampie',
+        'fap', 'nude', 'orgasm',
+        'squirting', 'yiff',
+        'e621'
+    ]
+    nono_sites = [
+        'xvideos', 'pornhub',
+        'xhamster', 'xnxx',
+        'youporn', 'xxx',
+        'freexcafe', 'sex.com',
+        'e621', 'nhentai'
+    ]
+
+    if not is_nsfw:
+        for i in nono_words:
+            if i in query.replace(" ", ""):
+                return (
+                    "**Sorry!** That query included language "
+                    "we cannot accept in a non-NSFW channel. "
+                    "Please try again in an NSFW channel."
+                )
+
+    # Choose an instance
+    if self.instances == []:
+        with open('searxes.txt') as f:
+            self.instances = f.read().split('\n')
+    instance = random.sample(self.instances, k=1)[0]
+
+    # Error Template
+    error_msg = (
+        "**An error occured!**\n\n"
+        f"There was a problem with `{instance}`. Please try again later.\n"
+        f"_If problems with this instance persist, "
+        f"contact`{self.bot.appinfo.owner}` to have it removed._"
+    )
+
+    # Create the URL to make an API call to
+    call = f'{instance}search?q={query}&format=json&language=en-US'
+
+    # If a type is provided, add that type to the call URL
+    if category:
+        call += f'&categories={category}'
+
+    if is_nsfw:
+        call += '&safesearch=0'
+    else:
+        call += '&safesearch=1'
+
+    # Figure out engines for different categories to get decent results.
+    if category == 'videos':
+        call += '&engines=bing+videos,google+videos'
+    # Make said API call
+    try:
+        async with self.request.get(call) as resp:
+            response = await resp.json()
+    except aiohttp.ClientError:
+        return error_msg
+
+    # Split our response data up for parsing
+    # infoboxes = response['infoboxes']
+    results = response['results']
+
+    # Create message with results
+    try:
+        # Handle tiny result count
+        if len(results) > 5:
+            amt = 5
+        else:
+            amt = len(results)
+
+        # Remove no-no sites
        if not is_nsfw:
-            for i in nono_words:
-                if i in query.replace(" ", ""):
-                    return (
-                        "**Sorry!** That query included language "
-                        "we cannot accept in a non-NSFW channel. "
-                        "Please try again in an NSFW channel."
-                    )
+            for r in results[0:7]:
+                for n in nono_sites:
+                    if n in r['url']:
+                        results.remove(r)

-        # Choose an instance
-        if self.instances == []:
-            with open('searxes.txt') as f:
-                self.instances = f.read().split('\n')
-        instance = random.sample(self.instances, k=1)[0]
+        # Escape stuff
+        query = discord.utils.escape_mentions(query)
+        query = discord.utils.escape_markdown(query)

-        # Error Template
-        error_msg = (
-            "**An error occured!**\n\n"
-            f"There was a problem with `{instance}`. Please try again later.\n"
-            f"_If problems with this instance persist, "
-            f"contact`{self.bot.appinfo.owner}` to have it removed._"
+        # Header
+        msg = f"Showing **{amt}** results for `{query}`. \n\n"
+        # Expanded Result
+        msg += (
+            f"**{results[0]['title']}** <{results[0]['url']}>\n"
+            f"{results[0]['content']}\n\n")
+        # Other Results
+        msg += "\n".join(
+            [f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
+        # Instance Info
+        msg += f"\n\n_Results retrieved from instance `{instance}`._"
+
+        return msg
+
+    # Reached if error with returned results
+    except (KeyError, IndexError) as e:
+        # Logging
+        await self.warn(
+            f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
+            "Consider removing it or looking into it.",
+            name="Failed Instance"
        )

-        # Create the URL to make an API call to
-        call = f'{instance}search?q={query}&format=json&language=en-US'
+        self.instances.remove(instance)  # Weed the instance out
+        # Recurse until good response
+        return await self._old_search_logic(query, is_nsfw)

-        # If a type is provided, add that type to the call URL
-        if category:
-            call += f'&categories={category}'

-        if is_nsfw:
-            call += '&safesearch=0'
-        else:
-            call += '&safesearch=1'
+async def _instance_check(self, instance: str, content: dict) -> bool:
+    """Checks the quality of an instance."""

-        # Figure out engines for different categories to get decent results.
-        if category == 'videos':
-            call += '&engines=bing+videos,google+videos'
-        # Make said API call
-        try:
-            async with self.request.get(call) as resp:
-                response = await resp.json()
-        except aiohttp.ClientError:
-            return error_msg
+    # Makes sure proper values exist
+    if 'error' in content:
+        return False
+    if not ('engines' in content and 'initial' in content['timing']):
+        return False
+    if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):
+        return False

-        # Split our response data up for parsing
-        # infoboxes = response['infoboxes']
-        results = response['results']
+    # Makes sure google is enabled
+    if not content['engines']['google']['enabled']:
+        return False

-        # Create message with results
-        try:
-            # Handle tiny result count
-            if len(results) > 5:
-                amt = 5
-            else:
-                amt = len(results)
+    # Makes sure is not Tor
+    if content['network_type'] != 'normal':
+        return False

-            # Remove no-no sites
-            if not is_nsfw:
-                for r in results[0:7]:
-                    for n in nono_sites:
-                        if n in r['url']:
-                            results.remove(r)
+    # Only picks instances that are fast enough
+    timing = int(content['timing']['initial'])
+    if timing > 0.20:
+        return False

-            # Escape stuff
-            query = discord.utils.escape_mentions(query)
-            query = discord.utils.escape_markdown(query)
+    # Check for Google captcha
+    test_search = f'{instance}/search?q=test&format=json&lang=en-US'
+    try:
+        async with self.request.get(test_search) as resp:
+            response = await resp.json()
+        response['results'][0]['content']
+    except (aiohttp.ClientError, KeyError, IndexError):
+        return False

-            # Header
-            msg = f"Showing **{amt}** results for `{query}`. \n\n"
-            # Expanded Result
-            msg += (
-                f"**{results[0]['title']}** <{results[0]['url']}>\n"
-                f"{results[0]['content']}\n\n")
-            # Other Results
-            msg += "\n".join(
-                [f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
-            # Instance Info
-            msg += f"\n\n_Results retrieved from instance `{instance}`._"
+    # Reached if passes all checks
+    return True

-            return msg

-        # Reached if error with returned results
-        except (KeyError, IndexError) as e:
-            # Logging
-            await self.warn(
-                f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
-                "Consider removing it or looking into it.",
-                name="Failed Instance"
-            )
+@commands.command()
+@commands.is_owner()
+async def rejson(self, ctx):
+    """Refreshes the list of instances for searx."""

-            self.instances.remove(instance)  # Weed the instance out
-            # Recurse until good response
-            return await self._old_search_logic(query, is_nsfw)
+    msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'
+                         '(Due to extensive quality checks, this may take a bit.)')
+    plausible: List[str] = []

-    async def _instance_check(self, instance: str, content: dict) -> bool:
-        """Checks the quality of an instance."""
+    # Get, parse, and quality check all instances
+    async with self.request.get('https://searx.space/data/instances.json') as r:
+        # Parsing
+        searx_json = await r.json()
+        instances = searx_json['instances']

-        # Makes sure proper values exist
-        if 'error' in content:
-            return False
-        if not ('engines' in content and 'initial' in content['timing']):
-            return False
-        if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):
-            return False
+        # Quality Check
+        for i in instances:
+            content = instances.get(i)
+            is_good: bool = await self._instance_check(i, content)
+            if is_good:
+                plausible.append(i)

-        # Makes sure google is enabled
-        if not content['engines']['google']['enabled']:
-            return False
+    # Save new list
+    self.instances = plausible
+    with open('searxes.txt', 'w') as f:
+        f.write('\n'.join(plausible))

-        # Makes sure is not Tor
-        if content['network_type'] != 'normal':
-            return False
+    await msg.edit(content='Instances refreshed!')

-        # Only picks instances that are fast enough
-        timing = int(content['timing']['initial'])
-        if timing > 0.20:
-            return False
+async def _old_basic_search(self, ctx, query: str,
+                            category: str = None):
+    """Base search message generation."""

-        # Check for Google captcha
-        test_search = f'{instance}/search?q=test&format=json&lang=en-US'
-        try:
-            async with self.request.get(test_search) as resp:
-                response = await resp.json()
-            response['results'][0]['content']
-        except (aiohttp.ClientError, KeyError, IndexError):
-            return False
+    async with ctx.typing():
+        is_nsfw = (
+            ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
+            else False
+        )

-        # Reached if passes all checks
-        return True
+        msg = await self._old_search_logic(query, is_nsfw, category)
+        await ctx.send(msg)

-            @commands.command()
-    @commands.is_owner()
-    async def rejson(self, ctx):
-        """Refreshes the list of instances for searx."""
-
-        msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'
-                             '(Due to extensive quality checks, this may take a bit.)')
-        plausible: List[str] = []
-
-        # Get, parse, and quality check all instances
-        async with self.request.get('https://searx.space/data/instances.json') as r:
-            # Parsing
-            searx_json = await r.json()
-            instances = searx_json['instances']
-
-            # Quality Check
-            for i in instances:
-                content = instances.get(i)
-                is_good: bool = await self._instance_check(i, content)
-                if is_good:
-                    plausible.append(i)
-
-        # Save new list
-        self.instances = plausible
-        with open('searxes.txt', 'w') as f:
-            f.write('\n'.join(plausible))
-
-        await msg.edit(content='Instances refreshed!')
-
-            async def _old_basic_search(self, ctx, query: str,
-                                category: str = None):
-        """Base search message generation."""
-
-        async with ctx.typing():
-            is_nsfw = (
-                ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
-                else False
-            )
-
-            msg = await self._old_search_logic(query, is_nsfw, category)
-            await ctx.send(msg)
-
-            await self.info(
-                content=(
-                    f"**{ctx.author}** searched for `{query}` "
-                    f"in \"{ctx.guild}\" and got this:"
-                    f"\n\n{msg}"
-                ),
-                name="Search Results"
-            )
+        await self.info(
+            content=(
+                f"**{ctx.author}** searched for `{query}` "
+                f"in \"{ctx.guild}\" and got this:"
+                f"\n\n{msg}"
+            ),
+            name="Search Results"
+        )