[scripts/leveldb-dump] support setting separate key and value encodings

This commit is contained in:
Dmytro Meleshko 2021-04-23 21:38:37 +03:00
parent 44ba053b7c
commit 660316fb14
1 changed files with 19 additions and 13 deletions

View File

@ -8,36 +8,42 @@ from sys import stdout
import base64
parser = argparse.ArgumentParser()
parser.add_argument(
"--encode",
"-e",
choices=["utf8", "base16", "base32", "base64", "base85"],
default="utf8",
)
encoding_names = ["utf8", "base16", "base32", "base64", "base85"]
parser.add_argument("--encoding", "-e", choices=encoding_names, default="utf8")
parser.add_argument("--key-encoding", choices=encoding_names, default=None)
parser.add_argument("--value-encoding", choices=encoding_names, default=None)
parser.add_argument("db_path", type=Path)
cli_args = parser.parse_args()
def bytes_to_json(b: bytes) -> Union[str, list[int]]:
if cli_args.encode == "utf8":
def bytes_to_json(b: bytes, encoding: str) -> Union[str, list[int]]:
if encoding == "utf8":
try:
return b.decode("utf8")
except UnicodeDecodeError:
return list(b)
elif cli_args.encode == "base16":
elif encoding == "base16":
return base64.b16encode(b).decode("ascii")
elif cli_args.encode == "base32":
elif encoding == "base32":
return base64.b32encode(b).decode("ascii")
elif cli_args.encode == "base64":
elif encoding == "base64":
return base64.b64encode(b).decode("ascii")
elif cli_args.encode == "base85":
elif encoding == "base85":
return base64.b85encode(b).decode("ascii")
else:
assert False
key_encoding: str = cli_args.key_encoding or cli_args.encoding
value_encoding: str = cli_args.value_encoding or cli_args.encoding
db = plyvel.DB(str(cli_args.db_path), create_if_missing=False)
with db.iterator() as iterator:
for key, value in iterator:
json.dump({"key": bytes_to_json(key), "value": bytes_to_json(value)}, stdout)
json.dump(
{
"key": bytes_to_json(key, key_encoding),
"value": bytes_to_json(value, value_encoding),
},
stdout,
)
stdout.write("\n")