skip latest pages

they probably dont have good ground truth to begin with
This commit is contained in:
Luna 2023-06-10 15:25:45 -03:00
parent 5a51c67003
commit d8a4f6aaaf
1 changed files with 9 additions and 4 deletions

13
main.py
View File

@ -157,13 +157,13 @@ class Danbooru(Booru):
title = "Danbooru"
base_url = "https://danbooru.donmai.us"
async def posts(self, tag_query: str, limit):
async def posts(self, tag_query: str, limit, page: int):
log.info("%s: submit %r", self.title, tag_query)
async with self.limiter:
log.info("%s: submit upstream %r", self.title, tag_query)
async with self.session.get(
f"{self.base_url}/posts.json",
params={"tags": tag_query, "limit": limit},
params={"tags": tag_query, "limit": limit, "page": page},
) as resp:
assert resp.status == 200
rjson = await resp.json()
@ -190,7 +190,12 @@ async def download_images(ctx):
try:
limit = int(sys.argv[3])
except IndexError:
limit = 10
limit = 30
try:
pageskip = int(sys.argv[4])
except IndexError:
pageskip = 150
danbooru = Danbooru(
ctx.session,
@ -198,7 +203,7 @@ async def download_images(ctx):
AsyncLimiter(1, 3),
)
posts = await danbooru.posts(tagquery, limit)
posts = await danbooru.posts(tagquery, limit, pageskip)
for post in posts:
if "md5" not in post:
continue