Compare commits

..

No commits in common. "5a857da5d4417949c6f18e20ed38296e21216187" and "21e05ba5464b4ced424e1057dbd429449d0bf28f" have entirely different histories.

5 changed files with 77 additions and 139 deletions

View File

@ -1,5 +0,0 @@
*.csv.gz
*.csv
*.db
env/
authfile.example

View File

@ -1,8 +0,0 @@
FROM python:3.10-alpine
RUN apk add sqlite
ADD . ./e621_api_cloner
RUN pip3 install -Ur ./e621_api_cloner/requirements.txt
RUN touch ./e621_api_cloner/e621.db
EXPOSE 1337
WORKDIR /e621_api_cloner
CMD ["hypercorn", "--access-log", "-", "-b", "0.0.0.0:1337", "e621_api_cloner:app"]

View File

@ -1,2 +0,0 @@
5a8649fb426e13c080e118332f44b3adb70cc9c677fba58c3d018313c4b0ad67 my silly tool account 1
504054a7c35af520ab0ddd14e1ad257633fe75ba8601915541abba6cec1a81f7 my silly tool account 2

View File

@ -196,9 +196,6 @@ async def main_with_ctx(ctx, wanted_date):
await ctx.db.commit() await ctx.db.commit()
log.info("going to process posts") log.info("going to process posts")
post_count_rows = await ctx.db.execute_fetchall("select count(*) from posts")
post_count = post_count_rows[0][0]
log.info("already have %d posts", post_count)
with output_uncompressed_paths["posts"].open( with output_uncompressed_paths["posts"].open(
mode="r", encoding="utf-8" mode="r", encoding="utf-8"
@ -210,87 +207,80 @@ async def main_with_ctx(ctx, wanted_date):
line_count -= 1 # remove header line_count -= 1 # remove header
log.info("%d posts to import", line_count) log.info("%d posts to import", line_count)
if line_count == post_count: posts_csv_fd.seek(0)
log.info("already imported everything, skipping") posts_reader = csv.DictReader(posts_csv_fd)
else:
posts_csv_fd.seek(0)
posts_reader = csv.DictReader(posts_csv_fd)
processed_count = 0 processed_count = 0
processed_ratio = 0.0 processed_ratio = 0.0
for row in posts_reader: for row in posts_reader:
created_at_str = row["created_at"] created_at_str = row["created_at"]
created_at = datetime.strptime( created_at = datetime.strptime(
created_at_str[: created_at_str.find(".")], "%Y-%m-%d %H:%M:%S" created_at_str[: created_at_str.find(".")], "%Y-%m-%d %H:%M:%S"
) )
post = Post( post = Post(
id=int(row["id"]), id=int(row["id"]),
uploader_id=int(row["uploader_id"]), uploader_id=int(row["uploader_id"]),
created_at=int(created_at.timestamp()), created_at=int(created_at.timestamp()),
md5=row["md5"], md5=row["md5"],
source=row["source"], source=row["source"],
rating=row["rating"], rating=row["rating"],
tag_string=row["tag_string"], tag_string=row["tag_string"],
is_deleted=e621_bool(row["is_deleted"]), is_deleted=e621_bool(row["is_deleted"]),
is_pending=e621_bool(row["is_pending"]), is_pending=e621_bool(row["is_pending"]),
is_flagged=e621_bool(row["is_flagged"]), is_flagged=e621_bool(row["is_flagged"]),
score=int(row["score"]), score=int(row["score"]),
up_score=int(row["up_score"]), up_score=int(row["up_score"]),
down_score=int(row["down_score"]), down_score=int(row["down_score"]),
is_rating_locked=e621_bool(row["is_rating_locked"]), is_rating_locked=e621_bool(row["is_rating_locked"]),
) )
await ctx.db.execute( await ctx.db.execute(
""" """
insert into posts ( insert into posts (
id, id,
uploader_id, uploader_id,
created_at, created_at,
md5, md5,
source, source,
rating, rating,
tag_string, tag_string,
is_deleted, is_deleted,
is_pending, is_pending,
is_flagged, is_flagged,
score, score,
up_score, up_score,
down_score, down_score,
is_rating_locked is_rating_locked
) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?) ) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
""", """,
( (
post.id, post.id,
post.uploader_id, post.uploader_id,
post.created_at, post.created_at,
post.md5, post.md5,
post.source, post.source,
post.rating, post.rating,
post.tag_string, post.tag_string,
post.is_deleted, post.is_deleted,
post.is_pending, post.is_pending,
post.is_flagged, post.is_flagged,
post.score, post.score,
post.up_score, post.up_score,
post.down_score, post.down_score,
post.is_rating_locked, post.is_rating_locked,
), ),
) )
processed_count += 1 processed_count += 1
new_processed_ratio = round((processed_count / line_count) * 100, 2) new_processed_ratio = round((processed_count / line_count) * 100, 2)
if str(new_processed_ratio) != str(processed_ratio): if str(new_processed_ratio) != str(processed_ratio):
log.info("posts processed at %.2f%%", processed_ratio) log.info("posts processed at %.2f%%", processed_ratio)
processed_ratio = new_processed_ratio processed_ratio = new_processed_ratio
log.info("posts done") log.info("posts done")
await ctx.db.commit() await ctx.db.commit()
log.info("vacuuming db...")
await ctx.db.execute("vacuum")
log.info("database built")
async def main(): async def main():

View File

@ -7,7 +7,6 @@ import sqlite3
import sys import sys
import os import os
import enum import enum
from pathlib import Path
from datetime import datetime from datetime import datetime
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
from hypercorn.asyncio import serve, Config from hypercorn.asyncio import serve, Config
@ -20,32 +19,13 @@ app = Quart(__name__)
@app.before_serving @app.before_serving
async def app_before_serving(): async def app_before_serving():
logging.basicConfig(
level=logging.DEBUG if os.environ.get("DEBUG") else logging.INFO
)
db_path = os.environ["DBPATH"] db_path = os.environ["DBPATH"]
app.db = await aiosqlite.connect(db_path) app.db = await aiosqlite.connect(db_path)
app.db.row_factory = sqlite3.Row app.db.row_factory = sqlite3.Row
# contains api keys
maybe_authfile_path = os.environ.get("AUTHFILE")
app.maybe_authfile = Path(maybe_authfile_path) if maybe_authfile_path else None
if app.maybe_authfile:
app.apikeys = {}
log.info("loading auth with api keys")
with app.maybe_authfile.open(mode="r") as fd:
for line in fd:
api_key, *user_name = line.split(" ")
app.apikeys[api_key] = (" ".join(user_name)).strip()
@app.after_serving @app.after_serving
async def app_after_serving(): async def app_after_serving():
log.info("possibly optimizing database")
await app.db.execute("PRAGMA analysis_limit=400")
await app.db.execute("PRAGMA optimize")
log.info("closing connection")
await app.db.close() await app.db.close()
@ -116,34 +96,8 @@ class TagCategory(enum.IntEnum):
}[self] }[self]
async def maybe_do_authentication():
if not app.maybe_authfile:
return None
auth_line = request.headers.get("authorization")
if not auth_line:
return "authorization header required", 400
auth_type, auth_data = auth_line.split(" ")
if auth_type != "Bearer":
log.warn("invalid auth type")
return "invalid auth token type (must be Bearer)", 400
auth_name = app.apikeys.get(auth_data)
if auth_name is None:
log.warn("invalid auth value")
return "invalid auth token (unknown api key)", 400
log.info("logged in as %r", auth_name)
return None
@app.route("/posts.json") @app.route("/posts.json")
async def posts_json(): async def posts_json():
res = await maybe_do_authentication()
if res:
return res
tag_str = request.args["tags"] tag_str = request.args["tags"]
tags = tag_str.split(" ") tags = tag_str.split(" ")
if len(tags) != 1: if len(tags) != 1:
@ -176,3 +130,12 @@ async def posts_json():
post_json["tags"][category_str].append(tag) post_json["tags"][category_str].append(tag)
return {"posts": [post_json]} return {"posts": [post_json]}
if __name__ == "__main__":
logging.basicConfig(
level=logging.DEBUG if os.environ.get("DEBUG") else logging.INFO
)
config = Config()
config.bind = ["0.0.0.0:1334"]
asyncio.run(serve(app, config))