add note about working with larger datasets

add usage notes to readme
add practical error column
2023-06-10 18:37:03 -03:00 · 2023-06-10 18:36:18 -03:00 · 2023-06-10 18:36:12 -03:00
2 changed files with 39 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -12,8 +12,37 @@ score formula:

 then average for all posts

+system dependencies:
+ - python3
+ - [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) with the [tagger extension](https://github.com/toriato/stable-diffusion-webui-wd14-tagger)
+ - [hydrus-dd](https://gitgud.io/koto/hydrus-dd)
+
 ```sh
 python3 -m venv env
 env/bin/pip install -Ur ./requirements.txt
-env/bin/python3 ./main.py
+
+# by default, downloads 30 images at page 150 of the default empty query
+env/bin/python3 ./main.py download_images
+
+# gets 40 images at page 150 from tag 'rating:questionable'
+# you should add more tags to diversify the dataset before calculating scores
+env/bin/python3 ./main.py download_images 'rating:questionable' 40 150
+
+# configure interrogators / tagger models
+# set sd_webui_address to your stable diffusion webui' address
+# set dd_address to hydrus-dd's address
+# and set dd_model_name to be something identifiable about the model
+# i set it to the md5sum output of my file, to make sure that if the file
+# changes back on koto's end, my numbers may be different
+cp config.example.json config.json
+
+# fight mode -- run all interrogators against the dataset you've downloaded
+env/bin/python3 ./main.py fight
+
+# score mode -- crank the final numbers, generates graphs under plots/ folder
+env/bin/python3 ./main.py fight
+
+# keep in mind that you can download more images, run fight mode, and then
+# run score mode! the commands are aware of work that's been already done and
+# will only run the tagger models for the new files
 ```
--- a/main.py
+++ b/main.py
@ -472,7 +472,12 @@ def plot2(output_path, normalized_scores, model_scores):


 def plot3(output_path, normalized_scores, model_scores):
-    data_for_df = {"model": [], "errors": [], "rating_errors": []}
+    data_for_df = {
+        "model": [],
+        "errors": [],
+        "rating_errors": [],
+        "practical_errors": [],
+    }

    for model in sorted(
        normalized_scores.keys(),
@ -488,9 +493,11 @@ def plot3(output_path, normalized_scores, model_scores):
                for rating in ["general", "sensitive", "questionable", "explicit"]
                if rating in score_data["incorrect_tags"]
            )
+        practical_absolute_error = total_incorrect_tags - total_rating_errors

        data_for_df["errors"].append(total_incorrect_tags)
        data_for_df["rating_errors"].append(total_rating_errors)
+        data_for_df["practical_errors"].append(practical_absolute_error)
        data_for_df["model"].append(model)

    df = pd.DataFrame(data_for_df)
@ -499,6 +506,7 @@ def plot3(output_path, normalized_scores, model_scores):
        data=[
            go.Bar(name="incorrect tags", x=df.model, y=df.errors),
            go.Bar(name="incorrect ratings", x=df.model, y=df.rating_errors),
+            go.Bar(name="practical error", x=df.model, y=df.practical_errors),
        ]
    )
    pio.write_image(fig, output_path, width=1024, height=800)
Author	SHA1	Message	Date
Luna	511ce6af16	add note about working with larger datasets	2023-06-10 18:37:03 -03:00
Luna	c213987859	add usage notes to readme	2023-06-10 18:36:18 -03:00
Luna	faeb1fd7b3	add practical error column	2023-06-10 18:36:12 -03:00