decompress csv files
This commit is contained in:
parent
ed0696721f
commit
2c333ee00b
|
@ -31,7 +31,10 @@ async def main():
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(url)
|
||||||
parsed_path = Path(parsed.path)
|
parsed_path = Path(parsed.path)
|
||||||
output_path = Path.cwd() / parsed_path.name
|
output_path = Path.cwd() / parsed_path.name
|
||||||
if not output_path.exists():
|
if output_path.exists():
|
||||||
|
log.info("file %s already exists, ignoring", output_path)
|
||||||
|
continue
|
||||||
|
|
||||||
log.info("downloading %r into %s", url, output_path)
|
log.info("downloading %r into %s", url, output_path)
|
||||||
async with ctx.session.get(url) as resp:
|
async with ctx.session.get(url) as resp:
|
||||||
assert resp.status == 200
|
assert resp.status == 200
|
||||||
|
@ -57,8 +60,19 @@ async def main():
|
||||||
log.info("copying temp to output")
|
log.info("copying temp to output")
|
||||||
with output_path.open(mode="wb") as output_fd:
|
with output_path.open(mode="wb") as output_fd:
|
||||||
shutil.copyfileobj(temp_fd, output_fd)
|
shutil.copyfileobj(temp_fd, output_fd)
|
||||||
else:
|
|
||||||
log.info("file %s already exists, ignoring", output_path)
|
# decompress
|
||||||
|
for url in urls:
|
||||||
|
parsed = urlparse(url)
|
||||||
|
parsed_path = Path(parsed.path)
|
||||||
|
input_path = Path.cwd() / parsed_path.name
|
||||||
|
original_name, original_extension, _gz = parsed_path.name.split(".")
|
||||||
|
output_path = Path.cwd() / f"{original_name}.{original_extension}"
|
||||||
|
if output_path.exists():
|
||||||
|
continue
|
||||||
|
with gzip.open(input_path, "rb") as in_fd:
|
||||||
|
with output_path.open(mode="wb") as out_fd:
|
||||||
|
shutil.copyfileobj(in_fd, out_fd)
|
||||||
|
|
||||||
# now that everythings downloaded, compile the db
|
# now that everythings downloaded, compile the db
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue