diff --git a/build_database.py b/build_database.py index 0dc06b6..c724fe6 100644 --- a/build_database.py +++ b/build_database.py @@ -1,4 +1,5 @@ import asyncio +import gzip import logging import shutil import tempfile @@ -56,6 +57,8 @@ async def main(): log.info("download at %d%%", download_ratio) download_ratio = new_download_ratio + temp_fd.seek(0) + # write to output log.info("copying temp to output") with output_path.open(mode="wb") as output_fd: @@ -69,7 +72,10 @@ async def main(): original_name, original_extension, _gz = parsed_path.name.split(".") output_path = Path.cwd() / f"{original_name}.{original_extension}" if output_path.exists(): + log.info("decompressed file %s already exists, ignoring", output_path) continue + + log.info("decompressing %s into %s", input_path.name, output_path.name) with gzip.open(input_path, "rb") as in_fd: with output_path.open(mode="wb") as out_fd: shutil.copyfileobj(in_fd, out_fd)