ScrapHacks/tools/analyze_read_trace.py

76 lines
2.3 KiB
Python
Raw Permalink Normal View History

import msgpack as mp
import sys
import os
from tqdm import tqdm
import struct
import binascii
import string
import re
from binascii import hexlify
def gen():
with open(sys.argv[1], "rb") as fh:
size = os.stat(sys.argv[1]).st_size
progbar = tqdm(
total=size, unit="bytes", unit_scale=True, unit_divisor=1024, leave=False
)
pos = 0
for entry in mp.Unpacker(fh, raw=True):
progbar.update(fh.tell() - pos)
pos = fh.tell()
for k in entry.copy():
k_s = str(k, "utf8")
if k_s not in ["data", "stack", "timestamp"]:
entry[k] = str(entry.pop(k), "utf8")
entry[k_s] = entry.pop(k)
entry["stack"] = "|".join(
["{:08X}".format(int(str(v, "utf8"), 16)) for v in entry["stack"][::-1]]
)
yield entry
def strdump(data):
printable_chars = set(bytes(string.printable, "ascii")) - set(b"\n\r\t\x0b\x0c")
return "".join(chr(c) if c in printable_chars else "." for c in data)
# best=sorted(tqdm(gen(),ascii=True),key=lambda v:len(v['data']),reverse=True)
# def score(entry):
# return len(entry['data'])
# def analyze(entry):
# data=entry['data']
# entry['infos'] = {
# 'len':len(data),
# }
# for bo in "><":
# for t in "hHiIlLqQefd":
# fmt="{}{}".format(bo,t)
# if len(data)%struct.calcsize(fmt)==0:
# entry['infos'][fmt]=[v[0] for v in struct.iter_unpack(fmt,data)]
# return entry
filters = sys.argv[2:]
with open("all.log", "w") as of:
for entry in gen():
fm = any(
all(s.lower() in entry["filename"].lower() for s in f.split("|"))
for f in filters
)
if filters and not fm:
continue
is_magic = bytes(entry["block_id"], "utf8").ljust(4, b"\0") == entry["data"]
entry["data_len"] = len(entry["data"])
entry["str"] = strdump(entry["data"])
entry["data"] = entry["data"].hex().upper()
if is_magic:
print("#" * 50, file=of)
print(
"{timestamp} {block_id} {filename} {data_len:08X} {data} {str}".format(
**entry
),
file=of,
)