skip latest pages

they probably dont have good ground truth to begin with
This commit is contained in:
Luna 2023-06-10 15:25:45 -03:00
parent 5a51c67003
commit d8a4f6aaaf
1 changed files with 9 additions and 4 deletions

13
main.py
View File

@ -157,13 +157,13 @@ class Danbooru(Booru):
title = "Danbooru" title = "Danbooru"
base_url = "https://danbooru.donmai.us" base_url = "https://danbooru.donmai.us"
async def posts(self, tag_query: str, limit): async def posts(self, tag_query: str, limit, page: int):
log.info("%s: submit %r", self.title, tag_query) log.info("%s: submit %r", self.title, tag_query)
async with self.limiter: async with self.limiter:
log.info("%s: submit upstream %r", self.title, tag_query) log.info("%s: submit upstream %r", self.title, tag_query)
async with self.session.get( async with self.session.get(
f"{self.base_url}/posts.json", f"{self.base_url}/posts.json",
params={"tags": tag_query, "limit": limit}, params={"tags": tag_query, "limit": limit, "page": page},
) as resp: ) as resp:
assert resp.status == 200 assert resp.status == 200
rjson = await resp.json() rjson = await resp.json()
@ -190,7 +190,12 @@ async def download_images(ctx):
try: try:
limit = int(sys.argv[3]) limit = int(sys.argv[3])
except IndexError: except IndexError:
limit = 10 limit = 30
try:
pageskip = int(sys.argv[4])
except IndexError:
pageskip = 150
danbooru = Danbooru( danbooru = Danbooru(
ctx.session, ctx.session,
@ -198,7 +203,7 @@ async def download_images(ctx):
AsyncLimiter(1, 3), AsyncLimiter(1, 3),
) )
posts = await danbooru.posts(tagquery, limit) posts = await danbooru.posts(tagquery, limit, pageskip)
for post in posts: for post in posts:
if "md5" not in post: if "md5" not in post:
continue continue