use wc for linecount

This commit is contained in:
Luna 2022-08-28 16:17:17 -03:00
parent 521da51092
commit 75926b5b8c

View file

@ -194,13 +194,16 @@ async def main_with_ctx(ctx, wanted_date):
await ctx.db.commit()
log.info("going to process posts")
line_count_str = subprocess.check_output(
["wc", "-l", output_compressed_paths["posts"]], stdout=subprocess.PIPE
)
line_count = int(line_count_str)
with output_uncompressed_paths["posts"].open(
mode="r", encoding="utf-8"
) as posts_csv_fd:
line_count = 0
for line in posts_csv_fd:
line_count += 1
line_count -= 1 # remove header
log.info("%d posts to import", line_count)
@ -208,7 +211,7 @@ async def main_with_ctx(ctx, wanted_date):
posts_reader = csv.DictReader(posts_csv_fd)
processed_count = 0
processed_ratio = 0
processed_ratio = 0.0
for row in posts_reader:
created_at_str = row["created_at"]
@ -270,9 +273,9 @@ async def main_with_ctx(ctx, wanted_date):
),
)
processed_count += 1
new_processed_ratio = round((processed_count / line_count) * 100)
if new_processed_ratio != processed_ratio:
log.info("posts processed at %d%%", processed_ratio)
new_processed_ratio = round((processed_count / line_count) * 100, 2)
if str(new_processed_ratio) != str(processed_ratio):
log.info("posts processed at %.2f%%", processed_ratio)
processed_ratio = new_processed_ratio
log.info("posts done")