|
|
|
@ -10,6 +10,10 @@ import asyncio
|
|
|
|
|
import aiosqlite
|
|
|
|
|
import base64
|
|
|
|
|
import logging
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import plotly.express as px
|
|
|
|
|
import plotly.graph_objs as go
|
|
|
|
|
import plotly.io as pio
|
|
|
|
|
from collections import defaultdict, Counter
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
@ -63,7 +67,7 @@ class DDInterrogator(Interrogator):
|
|
|
|
|
else:
|
|
|
|
|
original_danbooru_tag = tag
|
|
|
|
|
if original_danbooru_tag == "safe":
|
|
|
|
|
continue
|
|
|
|
|
original_danbooru_tag = "general"
|
|
|
|
|
new_lst.append(original_danbooru_tag)
|
|
|
|
|
return new_lst
|
|
|
|
|
|
|
|
|
@ -157,13 +161,13 @@ class Danbooru(Booru):
|
|
|
|
|
title = "Danbooru"
|
|
|
|
|
base_url = "https://danbooru.donmai.us"
|
|
|
|
|
|
|
|
|
|
async def posts(self, tag_query: str, limit):
|
|
|
|
|
async def posts(self, tag_query: str, limit, page: int):
|
|
|
|
|
log.info("%s: submit %r", self.title, tag_query)
|
|
|
|
|
async with self.limiter:
|
|
|
|
|
log.info("%s: submit upstream %r", self.title, tag_query)
|
|
|
|
|
async with self.session.get(
|
|
|
|
|
f"{self.base_url}/posts.json",
|
|
|
|
|
params={"tags": tag_query, "limit": limit},
|
|
|
|
|
params={"tags": tag_query, "limit": limit, "page": page},
|
|
|
|
|
) as resp:
|
|
|
|
|
assert resp.status == 200
|
|
|
|
|
rjson = await resp.json()
|
|
|
|
@ -190,7 +194,12 @@ async def download_images(ctx):
|
|
|
|
|
try:
|
|
|
|
|
limit = int(sys.argv[3])
|
|
|
|
|
except IndexError:
|
|
|
|
|
limit = 10
|
|
|
|
|
limit = 30
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
pageskip = int(sys.argv[4])
|
|
|
|
|
except IndexError:
|
|
|
|
|
pageskip = 150
|
|
|
|
|
|
|
|
|
|
danbooru = Danbooru(
|
|
|
|
|
ctx.session,
|
|
|
|
@ -198,7 +207,7 @@ async def download_images(ctx):
|
|
|
|
|
AsyncLimiter(1, 3),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
posts = await danbooru.posts(tagquery, limit)
|
|
|
|
|
posts = await danbooru.posts(tagquery, limit, pageskip)
|
|
|
|
|
for post in posts:
|
|
|
|
|
if "md5" not in post:
|
|
|
|
|
continue
|
|
|
|
@ -399,6 +408,101 @@ async def scores(ctx):
|
|
|
|
|
print("]")
|
|
|
|
|
print("most incorrect tags", incorrect_tags_counters[model].most_common(5))
|
|
|
|
|
|
|
|
|
|
PLOTS = Path.cwd() / "plots"
|
|
|
|
|
PLOTS.mkdir(exist_ok=True)
|
|
|
|
|
|
|
|
|
|
log.info("plotting score histogram...")
|
|
|
|
|
|
|
|
|
|
data_for_df = {}
|
|
|
|
|
data_for_df["scores"] = []
|
|
|
|
|
data_for_df["model"] = []
|
|
|
|
|
|
|
|
|
|
for model in sorted(
|
|
|
|
|
normalized_scores.keys(),
|
|
|
|
|
key=lambda model: normalized_scores[model],
|
|
|
|
|
reverse=True,
|
|
|
|
|
):
|
|
|
|
|
for post_score in (d["score"] for d in model_scores[model].values()):
|
|
|
|
|
data_for_df["scores"].append(post_score)
|
|
|
|
|
data_for_df["model"].append(model)
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(data_for_df)
|
|
|
|
|
fig = px.histogram(
|
|
|
|
|
df,
|
|
|
|
|
x="scores",
|
|
|
|
|
color="model",
|
|
|
|
|
histfunc="count",
|
|
|
|
|
marginal="rug",
|
|
|
|
|
histnorm="probability",
|
|
|
|
|
)
|
|
|
|
|
pio.write_image(fig, PLOTS / "score_histogram.png", width=1024, height=800)
|
|
|
|
|
|
|
|
|
|
log.info("plotting positive histogram...")
|
|
|
|
|
plot2(PLOTS / "positive_score_histogram.png", normalized_scores, model_scores)
|
|
|
|
|
log.info("plotting error rates...")
|
|
|
|
|
plot3(PLOTS / "error_rate.png", normalized_scores, model_scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def plot2(output_path, normalized_scores, model_scores):
|
|
|
|
|
data_for_df = {}
|
|
|
|
|
data_for_df["scores"] = []
|
|
|
|
|
data_for_df["model"] = []
|
|
|
|
|
|
|
|
|
|
for model in sorted(
|
|
|
|
|
normalized_scores.keys(),
|
|
|
|
|
key=lambda model: normalized_scores[model],
|
|
|
|
|
reverse=True,
|
|
|
|
|
):
|
|
|
|
|
for post_score in (d["score"] for d in model_scores[model].values()):
|
|
|
|
|
if post_score < 0:
|
|
|
|
|
continue
|
|
|
|
|
data_for_df["scores"].append(post_score)
|
|
|
|
|
data_for_df["model"].append(model)
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(data_for_df)
|
|
|
|
|
fig = px.histogram(
|
|
|
|
|
df,
|
|
|
|
|
x="scores",
|
|
|
|
|
color="model",
|
|
|
|
|
histfunc="count",
|
|
|
|
|
marginal="rug",
|
|
|
|
|
histnorm="probability",
|
|
|
|
|
)
|
|
|
|
|
pio.write_image(fig, output_path, width=1024, height=800)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def plot3(output_path, normalized_scores, model_scores):
|
|
|
|
|
data_for_df = {"model": [], "errors": [], "rating_errors": []}
|
|
|
|
|
|
|
|
|
|
for model in sorted(
|
|
|
|
|
normalized_scores.keys(),
|
|
|
|
|
key=lambda model: normalized_scores[model],
|
|
|
|
|
reverse=True,
|
|
|
|
|
):
|
|
|
|
|
total_incorrect_tags = 0
|
|
|
|
|
total_rating_errors = 0
|
|
|
|
|
for score_data in model_scores[model].values():
|
|
|
|
|
total_incorrect_tags += len(score_data["incorrect_tags"])
|
|
|
|
|
total_rating_errors += sum(
|
|
|
|
|
1
|
|
|
|
|
for rating in ["general", "sensitive", "questionable", "explicit"]
|
|
|
|
|
if rating in score_data["incorrect_tags"]
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
data_for_df["errors"].append(total_incorrect_tags)
|
|
|
|
|
data_for_df["rating_errors"].append(total_rating_errors)
|
|
|
|
|
data_for_df["model"].append(model)
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(data_for_df)
|
|
|
|
|
|
|
|
|
|
fig = go.Figure(
|
|
|
|
|
data=[
|
|
|
|
|
go.Bar(name="incorrect tags", x=df.model, y=df.errors),
|
|
|
|
|
go.Bar(name="incorrect ratings", x=df.model, y=df.rating_errors),
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
pio.write_image(fig, output_path, width=1024, height=800)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def realmain(ctx):
|
|
|
|
|
await ctx.db.executescript(
|
|
|
|
|