Compare commits

...

3 Commits

Author SHA1 Message Date
Luna 511ce6af16 add note about working with larger datasets 2023-06-10 18:37:03 -03:00
Luna c213987859 add usage notes to readme 2023-06-10 18:36:18 -03:00
Luna faeb1fd7b3 add practical error column 2023-06-10 18:36:12 -03:00
2 changed files with 39 additions and 2 deletions

View File

@ -12,8 +12,37 @@ score formula:
then average for all posts
system dependencies:
- python3
- [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the [tagger extension](https://github.com/toriato/stable-diffusion-webui-wd14-tagger)
- [hydrus-dd](https://gitgud.io/koto/hydrus-dd)
```sh
python3 -m venv env
env/bin/pip install -Ur ./requirements.txt
env/bin/python3 ./main.py
# by default, downloads 30 images at page 150 of the default empty query
env/bin/python3 ./main.py download_images
# gets 40 images at page 150 from tag 'rating:questionable'
# you should add more tags to diversify the dataset before calculating scores
env/bin/python3 ./main.py download_images 'rating:questionable' 40 150
# configure interrogators / tagger models
# set sd_webui_address to your stable diffusion webui' address
# set dd_address to hydrus-dd's address
# and set dd_model_name to be something identifiable about the model
# i set it to the md5sum output of my file, to make sure that if the file
# changes back on koto's end, my numbers may be different
cp config.example.json config.json
# fight mode -- run all interrogators against the dataset you've downloaded
env/bin/python3 ./main.py fight
# score mode -- crank the final numbers, generates graphs under plots/ folder
env/bin/python3 ./main.py fight
# keep in mind that you can download more images, run fight mode, and then
# run score mode! the commands are aware of work that's been already done and
# will only run the tagger models for the new files
```

10
main.py
View File

@ -472,7 +472,12 @@ def plot2(output_path, normalized_scores, model_scores):
def plot3(output_path, normalized_scores, model_scores):
data_for_df = {"model": [], "errors": [], "rating_errors": []}
data_for_df = {
"model": [],
"errors": [],
"rating_errors": [],
"practical_errors": [],
}
for model in sorted(
normalized_scores.keys(),
@ -488,9 +493,11 @@ def plot3(output_path, normalized_scores, model_scores):
for rating in ["general", "sensitive", "questionable", "explicit"]
if rating in score_data["incorrect_tags"]
)
practical_absolute_error = total_incorrect_tags - total_rating_errors
data_for_df["errors"].append(total_incorrect_tags)
data_for_df["rating_errors"].append(total_rating_errors)
data_for_df["practical_errors"].append(practical_absolute_error)
data_for_df["model"].append(model)
df = pd.DataFrame(data_for_df)
@ -499,6 +506,7 @@ def plot3(output_path, normalized_scores, model_scores):
data=[
go.Bar(name="incorrect tags", x=df.model, y=df.errors),
go.Bar(name="incorrect ratings", x=df.model, y=df.rating_errors),
go.Bar(name="practical error", x=df.model, y=df.practical_errors),
]
)
pio.write_image(fig, output_path, width=1024, height=800)