Initial work on pagination

This commit is contained in:
Adriene Hutchins 2020-04-07 17:40:42 -04:00
parent c64e52b865
commit cbea2c6440
4 changed files with 442 additions and 194 deletions

View File

@ -0,0 +1,176 @@
# -*- coding: utf-8 -*-
# search source
# Provides paginator sources for the search cog.
"""Search Source File"""
from typing import Callable, List, Tuple, Optional, Any
import discord
from discord.ext import menus
import html2text
import re
FetcherArgs = Tuple[Any]
Fetcher = Callable[..., List]
# Markdown converter
tomd = html2text.HTML2Text()
tomd.ignore_links = True
tomd.ignore_images = True
tomd.ignore_tables = True
tomd.ignore_emphasis = True
tomd.body_width = 0
# TODO Change around value names, make it general
class Result:
"""A class that holds the general data for a search result.
Parameters:
title (str): Title of the content.
url (str): The direct link to the content.
desc (str): The content's description.
source (Optional[str]): The source site. Defaults to url.
image (Optional[str]): The content's image.
"""
def __init__(self, title: str, url: str,
desc: str = "No description provided.",
source: Optional[str] = None, image: Optional[str] = None):
self.url = url
if title in [None, ""]:
self.title = "Unknown"
else:
self.title = title
self.desc = desc
self.source = source
self.image = image
def __repr__(self):
fmt = f'<Image url={self.url} title={self.title} source={self.source}>'
return fmt
class NormalSource(menus.AsyncIteratorPageSource):
def __init__(self, query: str, fetcher: FetcherArgs, per_page: int,
header: str = "", footer: str = ""):
self.header = header
self.footer = footer
self.query = query
super().__init__(self._generate(fetcher), per_page=per_page)
async def _generate(self, fetcher: Fetcher):
offset = 0
per_request = 10
# TODO put the generation in the fetcher itself
# Qwant: image - media, source - url, title - title
while results := await fetcher(
offset, per_request, self.query
):
results
for r in results:
yield r
offset += per_request
async def format_page(self, menu, entries):
start = menu.current_page * self.per_page
# Escapes all nasties for displaying
query_display = discord.utils.escape_mentions(self.query)
query_display = discord.utils.escape_markdown(query_display)
# Return if no results
try:
entries[0]
except IndexError:
return f"No results found for `{query_display}`."
# Gets the first entry's stuff
first_title = tomd.handle(entries[0].title).rstrip('\n')
first_url = entries[0].url
if start == 0:
first_desc = tomd.handle(entries[0].desc).rstrip('\n')
first = f"**{first_title}** {first_url}\n{first_desc}\n\n"
else:
first = f"**{first_title}** {first_url}\n"
# Builds the substring for each of the other results.
other_results: List[str] = []
for e in entries[1:5]:
title = tomd.handle(e.title).rstrip('\n')
url = e.url
other_results.append(f"**{title}** {url}")
other_msg = "\n".join(other_results)
# Builds message
msg = f"{first}{other_msg}"
msg = re.sub(
r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
r'<\1>',
msg
)
content = (
f"{self.header}\n\n"
f"Showing results *{start} - {start + 5}* "
f"for `{self.query}`.\n\n"
f"{msg}\n\n"
f"{self.footer}"
)
return content
class ImageSource(menus.AsyncIteratorPageSource):
def __init__(self, query: str, fetcher: FetcherArgs, args: FetcherArgs,
header: str = "", footer: str = ""):
self.header = header
self.footer = footer
self.query = query
super().__init__(self._generate(fetcher, args), per_page=1)
async def _generate(self, fetcher: Fetcher, fetch_args: FetcherArgs):
offset = 0
per_request = 10
# TODO put the generation in the fetcher itself
# Qwant: image - media, source - url, title - title
while results := await fetcher(
offset, per_request, self.query, *fetch_args
):
results
for r in results:
yield r
offset += per_request
async def format_page(self, menu, entry):
start = menu.current_page * self.per_page
content = (
f"{self.header}\n\n"
f"Showing image result `{start}` for `{self.query}``.\n\n"
f"<{entry.image}>"
f"{self.footer}\n\n"
)
embed = discord.Embed(
title=entry.title,
url=entry.image,
description=entry.source
)
embed.set_image(url=entry.image)
return {
"content": content,
"embed": embed
}

View File

@ -8,12 +8,13 @@
from typing import List from typing import List
import discord import discord
from discord.ext import commands from discord.ext import commands, menus
import html2text import html2text
import re import re
from urllib.parse import quote_plus from urllib.parse import quote_plus
from extensions.models import SearchExceptions from extensions.models import SearchExceptions
from extensions.models.search_source import Result, NormalSource, ImageSource
class Search(commands.Cog, name="Basic"): class Search(commands.Cog, name="Basic"):
@ -39,7 +40,8 @@ class Search(commands.Cog, name="Basic"):
self.tomd.body_width = 0 self.tomd.body_width = 0
async def _search_logic(self, query: str, is_nsfw: bool = False, async def _search_logic(self, query: str, is_nsfw: bool = False,
category: str = 'web', count: int = 5) -> list: category: str = 'web', count: int = 5,
offset: int = 0) -> list:
"""Uses scrapestack and the Qwant API to find search results.""" """Uses scrapestack and the Qwant API to find search results."""
# Typing # Typing
@ -87,9 +89,12 @@ class Search(commands.Cog, name="Basic"):
search_url = ( search_url = (
f"{base}/search/{category}" f"{base}/search/{category}"
f"?count={count}" f"?count={count}"
f"&offset={offset}"
f"&q={query}" f"&q={query}"
f"&safesearch={safesearch}" f"&safesearch={safesearch}"
"&t=web" f"&t={category}"
"&extensionDisabled=true"
"&device=tablet"
"&locale=en_US" "&locale=en_US"
"&uiv=4" "&uiv=4"
) )
@ -113,11 +118,47 @@ class Search(commands.Cog, name="Basic"):
} }
async with self.request.get(search_url, headers=headers) as resp: async with self.request.get(search_url, headers=headers) as resp:
to_parse = await resp.json() to_parse = await resp.json()
print(to_parse)
# Sends results # Sends results
return to_parse['data']['result']['items'] return to_parse['data']['result']['items']
async def _page_search(self, ctx, query: str, count:int = 5,
category: str = 'web'):
"""Basic search formatting - this time with pages!"""
is_nsfw = (
ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
else False
)
async def fetcher(offset, per_request, q):
result_objects = []
results = await self._search_logic(
query, is_nsfw, category, per_request, offset
)
for r in results:
result = Result(
title=r["title"],
url=r["url"],
desc=r["desc"],
source=r["source"]
)
result_objects.append(result)
return result_objects
pages = menus.MenuPages(
source=NormalSource(
query, fetcher, count,
footer="_Powered by Qwant._"
),
clear_reactions_after=True,
)
await pages.start(ctx)
async def _basic_search(self, ctx, query: str, category: str = 'web'): async def _basic_search(self, ctx, query: str, category: str = 'web'):
"""Basic search formatting.""" """Basic search formatting."""
@ -169,8 +210,6 @@ class Search(commands.Cog, name="Basic"):
f"{other_msg}\n\n_Powered by Qwant._" f"{other_msg}\n\n_Powered by Qwant._"
) )
print(msg)
msg = re.sub( msg = re.sub(
r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.' r'(https?://(?:www\.)?[-a-zA-Z0-9@:%._+~#=]+\.'
r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)', r'[a-zA-Z0-9()]+\b[-a-zA-Z0-9()@:%_+.~#?&/=]*)',
@ -178,7 +217,6 @@ class Search(commands.Cog, name="Basic"):
msg msg
) )
# Sends message # Sends message
await self.info( await self.info(
f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}", f"**New Search** - `{ctx.author}` in `{ctx.guild}`\n\n{msg}",
@ -186,6 +224,32 @@ class Search(commands.Cog, name="Basic"):
) )
await ctx.send(msg) await ctx.send(msg)
@commands.command()
async def paginated_search(self, ctx, *, query: str):
async with ctx.typing():
await self._page_search(ctx, query)
# async def fetcher(offset, per_request, q, *args):
# result_objects = []
# results = await self._search_logic(
# q, False, "images", per_request, offset)
# for r in results:
# image = Result(
# title=r["title"],
# url=r["media"],
# source=r["url"],
# image=r["media"]
# )
# result_objects.append(image)
# return result_objects
# pages = menus.MenuPages(
# source=ImageSource(query, fetcher, (None,)),
# clear_reactions_after=True)
# await pages.start(ctx)
@commands.command() @commands.command()
async def search(self, ctx, *, query: str): async def search(self, ctx, *, query: str):
"""Search online for general results.""" """Search online for general results."""

View File

@ -5,7 +5,8 @@
# Used and modified with permission. # Used and modified with permission.
# See LICENSE for license information. # See LICENSE for license information.
'''Main File'''
"""Main File"""
import json import json
import os import os
@ -15,13 +16,14 @@ from typing import List
import aiohttp import aiohttp
import discord import discord
from discord.ext import commands from discord.ext import commands
from discord.ext.menus import CannotAddReactions
import rethinkdb import rethinkdb
from extensions.models import SearchExceptions from extensions.models import SearchExceptions
class Bot(commands.Bot): class Bot(commands.Bot):
"""Custom Bot Class that subclasses the commands.ext one""" """Custom Bot Class that subclasses the commands.ext one."""
def __init__(self, **options): def __init__(self, **options):
"""Initializes the main parts of the bot.""" """Initializes the main parts of the bot."""
@ -271,6 +273,9 @@ async def on_command_error(ctx, error):
"Please try again in an NSFW channel." "Please try again in an NSFW channel."
) )
elif isinstance(error, CannotAddReactions):
await ctx.send("**I cannot add reactions for pagination here!**")
# Provides a very pretty embed if something's actually a dev's fault. # Provides a very pretty embed if something's actually a dev's fault.
elif isinstance(error, commands.CommandInvokeError): elif isinstance(error, commands.CommandInvokeError):

View File

@ -1,211 +1,214 @@
# This is the old search logic for reference purposes # This is the old search logic for reference purposes
async def _old_search_logic(self, query: str, is_nsfw: bool = False,
category: str = None) -> str:
"""Provides search logic for all search commands."""
# NSFW Filtering
# WARNING - This list includes slurs.
nono_words = [
'tranny', 'faggot', 'fag',
'porn', 'cock', 'dick',
'titty', 'boob', 'penis',
'slut', 'cum', 'jizz',
'semen', 'cooch', 'coochie',
'pussy', 'penis', 'fetish',
'bdsm', 'sexy', 'xxx',
'orgasm', 'masturbation',
'erotic', 'creampie',
'fap', 'nude', 'orgasm',
'squirting', 'yiff',
'e621'
]
nono_sites = [
'xvideos', 'pornhub',
'xhamster', 'xnxx',
'youporn', 'xxx',
'freexcafe', 'sex.com',
'e621', 'nhentai'
]
async def _old_search_logic(self, query: str, is_nsfw: bool = False,
category: str = None) -> str:
"""Provides search logic for all search commands."""
# NSFW Filtering
# WARNING - This list includes slurs.
nono_words = [
'tranny', 'faggot', 'fag',
'porn', 'cock', 'dick',
'titty', 'boob', 'penis',
'slut', 'cum', 'jizz',
'semen', 'cooch', 'coochie',
'pussy', 'penis', 'fetish',
'bdsm', 'sexy', 'xxx',
'orgasm', 'masturbation',
'erotic', 'creampie',
'fap', 'nude', 'orgasm',
'squirting', 'yiff',
'e621'
]
nono_sites = [
'xvideos', 'pornhub',
'xhamster', 'xnxx',
'youporn', 'xxx',
'freexcafe', 'sex.com',
'e621', 'nhentai'
]
if not is_nsfw:
for i in nono_words:
if i in query.replace(" ", ""):
return (
"**Sorry!** That query included language "
"we cannot accept in a non-NSFW channel. "
"Please try again in an NSFW channel."
)
# Choose an instance
if self.instances == []:
with open('searxes.txt') as f:
self.instances = f.read().split('\n')
instance = random.sample(self.instances, k=1)[0]
# Error Template
error_msg = (
"**An error occured!**\n\n"
f"There was a problem with `{instance}`. Please try again later.\n"
f"_If problems with this instance persist, "
f"contact`{self.bot.appinfo.owner}` to have it removed._"
)
# Create the URL to make an API call to
call = f'{instance}search?q={query}&format=json&language=en-US'
# If a type is provided, add that type to the call URL
if category:
call += f'&categories={category}'
if is_nsfw:
call += '&safesearch=0'
else:
call += '&safesearch=1'
# Figure out engines for different categories to get decent results.
if category == 'videos':
call += '&engines=bing+videos,google+videos'
# Make said API call
try:
async with self.request.get(call) as resp:
response = await resp.json()
except aiohttp.ClientError:
return error_msg
# Split our response data up for parsing
# infoboxes = response['infoboxes']
results = response['results']
# Create message with results
try:
# Handle tiny result count
if len(results) > 5:
amt = 5
else:
amt = len(results)
# Remove no-no sites
if not is_nsfw: if not is_nsfw:
for i in nono_words: for r in results[0:7]:
if i in query.replace(" ", ""): for n in nono_sites:
return ( if n in r['url']:
"**Sorry!** That query included language " results.remove(r)
"we cannot accept in a non-NSFW channel. "
"Please try again in an NSFW channel."
)
# Choose an instance # Escape stuff
if self.instances == []: query = discord.utils.escape_mentions(query)
with open('searxes.txt') as f: query = discord.utils.escape_markdown(query)
self.instances = f.read().split('\n')
instance = random.sample(self.instances, k=1)[0]
# Error Template # Header
error_msg = ( msg = f"Showing **{amt}** results for `{query}`. \n\n"
"**An error occured!**\n\n" # Expanded Result
f"There was a problem with `{instance}`. Please try again later.\n" msg += (
f"_If problems with this instance persist, " f"**{results[0]['title']}** <{results[0]['url']}>\n"
f"contact`{self.bot.appinfo.owner}` to have it removed._" f"{results[0]['content']}\n\n")
# Other Results
msg += "\n".join(
[f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
# Instance Info
msg += f"\n\n_Results retrieved from instance `{instance}`._"
return msg
# Reached if error with returned results
except (KeyError, IndexError) as e:
# Logging
await self.warn(
f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
"Consider removing it or looking into it.",
name="Failed Instance"
) )
# Create the URL to make an API call to self.instances.remove(instance) # Weed the instance out
call = f'{instance}search?q={query}&format=json&language=en-US' # Recurse until good response
return await self._old_search_logic(query, is_nsfw)
# If a type is provided, add that type to the call URL
if category:
call += f'&categories={category}'
if is_nsfw: async def _instance_check(self, instance: str, content: dict) -> bool:
call += '&safesearch=0' """Checks the quality of an instance."""
else:
call += '&safesearch=1'
# Figure out engines for different categories to get decent results. # Makes sure proper values exist
if category == 'videos': if 'error' in content:
call += '&engines=bing+videos,google+videos' return False
# Make said API call if not ('engines' in content and 'initial' in content['timing']):
try: return False
async with self.request.get(call) as resp: if not ('google' in content['engines'] and 'enabled' in content['engines']['google']):
response = await resp.json() return False
except aiohttp.ClientError:
return error_msg
# Split our response data up for parsing # Makes sure google is enabled
# infoboxes = response['infoboxes'] if not content['engines']['google']['enabled']:
results = response['results'] return False
# Create message with results # Makes sure is not Tor
try: if content['network_type'] != 'normal':
# Handle tiny result count return False
if len(results) > 5:
amt = 5
else:
amt = len(results)
# Remove no-no sites # Only picks instances that are fast enough
if not is_nsfw: timing = int(content['timing']['initial'])
for r in results[0:7]: if timing > 0.20:
for n in nono_sites: return False
if n in r['url']:
results.remove(r)
# Escape stuff # Check for Google captcha
query = discord.utils.escape_mentions(query) test_search = f'{instance}/search?q=test&format=json&lang=en-US'
query = discord.utils.escape_markdown(query) try:
async with self.request.get(test_search) as resp:
response = await resp.json()
response['results'][0]['content']
except (aiohttp.ClientError, KeyError, IndexError):
return False
# Header # Reached if passes all checks
msg = f"Showing **{amt}** results for `{query}`. \n\n" return True
# Expanded Result
msg += (
f"**{results[0]['title']}** <{results[0]['url']}>\n"
f"{results[0]['content']}\n\n")
# Other Results
msg += "\n".join(
[f"**{entry['title']}** <{entry['url']}>" for entry in results[1:5]])
# Instance Info
msg += f"\n\n_Results retrieved from instance `{instance}`._"
return msg
# Reached if error with returned results @commands.command()
except (KeyError, IndexError) as e: @commands.is_owner()
# Logging async def rejson(self, ctx):
await self.warn( """Refreshes the list of instances for searx."""
f"A user encountered a(n) `{e}` with <{instance}> when searching for `{query}`. "
"Consider removing it or looking into it.",
name="Failed Instance"
)
self.instances.remove(instance) # Weed the instance out msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n'
# Recurse until good response '(Due to extensive quality checks, this may take a bit.)')
return await self._old_search_logic(query, is_nsfw) plausible: List[str] = []
async def _instance_check(self, instance: str, content: dict) -> bool: # Get, parse, and quality check all instances
"""Checks the quality of an instance.""" async with self.request.get('https://searx.space/data/instances.json') as r:
# Parsing
searx_json = await r.json()
instances = searx_json['instances']
# Makes sure proper values exist # Quality Check
if 'error' in content: for i in instances:
return False content = instances.get(i)
if not ('engines' in content and 'initial' in content['timing']): is_good: bool = await self._instance_check(i, content)
return False if is_good:
if not ('google' in content['engines'] and 'enabled' in content['engines']['google']): plausible.append(i)
return False
# Makes sure google is enabled # Save new list
if not content['engines']['google']['enabled']: self.instances = plausible
return False with open('searxes.txt', 'w') as f:
f.write('\n'.join(plausible))
# Makes sure is not Tor await msg.edit(content='Instances refreshed!')
if content['network_type'] != 'normal':
return False
# Only picks instances that are fast enough async def _old_basic_search(self, ctx, query: str,
timing = int(content['timing']['initial']) category: str = None):
if timing > 0.20: """Base search message generation."""
return False
# Check for Google captcha async with ctx.typing():
test_search = f'{instance}/search?q=test&format=json&lang=en-US' is_nsfw = (
try: ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
async with self.request.get(test_search) as resp: else False
response = await resp.json() )
response['results'][0]['content']
except (aiohttp.ClientError, KeyError, IndexError):
return False
# Reached if passes all checks msg = await self._old_search_logic(query, is_nsfw, category)
return True await ctx.send(msg)
@commands.command() await self.info(
@commands.is_owner() content=(
async def rejson(self, ctx): f"**{ctx.author}** searched for `{query}` "
"""Refreshes the list of instances for searx.""" f"in \"{ctx.guild}\" and got this:"
f"\n\n{msg}"
msg = await ctx.send('<a:updating:403035325242540032> Refreshing instance list...\n\n' ),
'(Due to extensive quality checks, this may take a bit.)') name="Search Results"
plausible: List[str] = [] )
# Get, parse, and quality check all instances
async with self.request.get('https://searx.space/data/instances.json') as r:
# Parsing
searx_json = await r.json()
instances = searx_json['instances']
# Quality Check
for i in instances:
content = instances.get(i)
is_good: bool = await self._instance_check(i, content)
if is_good:
plausible.append(i)
# Save new list
self.instances = plausible
with open('searxes.txt', 'w') as f:
f.write('\n'.join(plausible))
await msg.edit(content='Instances refreshed!')
async def _old_basic_search(self, ctx, query: str,
category: str = None):
"""Base search message generation."""
async with ctx.typing():
is_nsfw = (
ctx.channel.is_nsfw() if hasattr(ctx.channel, 'is_nsfw')
else False
)
msg = await self._old_search_logic(query, is_nsfw, category)
await ctx.send(msg)
await self.info(
content=(
f"**{ctx.author}** searched for `{query}` "
f"in \"{ctx.guild}\" and got this:"
f"\n\n{msg}"
),
name="Search Results"
)