Choggbuster is a set of python scripts aimed at automated preprocessing of video DVDs for archival and filtering
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

430 lines
12 KiB

import itertools as ITT
import json
import os
import subprocess as SP
import sys
from pprint import pprint
from fractions import Fraction
from glob import glob
from collections import Counter
from tqdm import tqdm
from time import perf_counter
def pulldown(fields_per_second, frames_per_second):
f = Fraction(fields_per_second, frames_per_second)
colorspace = {
"gbr": 0,
"bt709": 1,
"unknown": 2,
"fcc": 4,
"bt470bg": 5,
"smpte170m": 6,
"smpte240m": 7,
"ycgco": 8,
"bt2020nc": 9,
"bt2020c": 10,
"smpte2085": 11,
"chroma-derived-nc": 12,
"chroma-derived-c": 13,
"ictcp": 14,
}
pict_types = {"I": 0b01, "P": 0b10, "B": 0b11}
def make_info(frames):
has_interlaced = any(frame["interlaced_frame"] for frame in frames)
new_gop = "timecode" in frames[0].get("tags", {})
info = 0x000
info |= 1 << 11 # always 1
info |= 0 << 10 # 0=Closed GOP, 1=Open GOP
info |= (not has_interlaced) << 9 # Progressive
info |= new_gop << 8
return info
def make_flags(frames):
flags = []
for frame in frames:
needs_prev = False
progressive = not int(frame["interlaced_frame"])
pt = pict_types[frame["pict_type"]]
reserved = 0b00
tff = int(frame["top_field_first"])
rff = int(frame["repeat_pict"])
flag = 0b0
flag |= (not needs_prev) << 7
flag |= progressive << 6
flag |= pt << 4
flag |= reserved << 2
flag |= tff << 1
flag |= rff
flags.append(f"{flag:02x}")
return flags
def make_line(frames, stream):
info = f"{make_info(frames):03x}"
matrix = colorspace[stream.get("color_space", "unknown")]
file = 0
position = frames[0]["pkt_pos"]
skip = 0
vob = 0
cell = 0
flags = make_flags(frames)
return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags]))
def __make_dict(line):
ret = {}
line = line.strip().split("|")
line_type = line[0]
for value in line[1:]:
entry = ret
if "=" not in value:
continue
key_path, value = value.split("=")
key_path = key_path.split(".")
for key in key_path[:-1]:
if ":" in key:
key = key.split(":")[1]
entry = entry.setdefault(key, {})
entry[key_path[-1]] = value
return {line_type: ret}
def judge(info, num_frames):
threshold = 1 # BFF/TFF threshold value
min_num_frames = 250 # minimal number of frames
idet = info["frame"]["lavfi"]["idet"]
idet_v = {}
for t in "repeated", "single", "multiple":
idet_v[t] = {}
for k, v in idet[t].items():
try:
idet_v[t][k] = int(v)
except ValueError:
try:
idet_v[t][k] = float(v)
except ValueError:
pass
idet = {
"repeat": {k: v for k, v in idet_v["repeated"].items()},
"single": {k: v for k, v in idet_v["single"].items()},
"multiple": {k: v for k, v in idet_v["multiple"].items()},
}
repeat_err = abs(
(idet["repeat"]["neither"] / num_frames) - 0.8
) # 2:3 pulldown,4 frames @ ~24 FPS to ~30 FPS = 20% repeated fields
print(f"Derivation from 2:3 Pulldown: {repeat_err:.2%}")
tff = idet["multiple"]["tff"]
bff = idet["multiple"]["bff"]
progressive = idet["multiple"]["progressive"]
interlaced = tff + bff
determined = interlaced + progressive
print(f"Determined: {determined}")
if interlaced:
print(f"Interlaced: {interlaced} (TFF: {tff/interlaced:.2%}, BFF: {bff/interlaced:.2%}) = {interlaced/determined:.2%}")
else:
print(f"Interlaced: {interlaced} = {interlaced/determined:.2%}")
print(f"Progressive: {progressive} = {progressive/determined:.2%}")
if determined == 0:
return idet
idet["num_frames"] = num_frames
idet["interlaced"] = interlaced
idet["progressive"] = progressive
if determined < 50 or determined < min_num_frames:
print("/!\\ Not enough information to determine interlacing type reliably, results may be inacurate /!\\")
if interlaced > progressive:
if tff > bff:
if repeat_err < 1.0:
idet["vid_type"] = "Telecined TFF"
else:
idet["vid_type"] = "Interlaced TFF"
elif bff > tff:
if repeat_err < 1.0:
idet["vid_type"] = "Telecined BFF"
else:
idet["vid_type"] = "Interlaced BFF"
else:
idet["vid_type"] = "Interlaced?"
else:
idet["vid_type"] = "Progressive"
print(f"Result: {idet['vid_type']}")
return idet
def get_meta_interlacing(path):
path = path.replace("\\", "/")
filtergraph = [
f"movie=\\'{path}\\'",
"cropdetect=limit=0.5:round=2",
"idet",
]
proc = SP.Popen(
[
"ffprobe",
"-loglevel",
"fatal",
"-probesize",
str(0x7FFFFFFF),
"-analyzeduration",
str(0x7FFFFFFF),
"-f",
"lavfi",
"-i",
",".join(filtergraph),
"-select_streams",
"v",
"-show_frames",
"-show_streams",
"-print_format",
"compact",
],
stdout=SP.PIPE,
stdin=SP.DEVNULL,
bufsize=0,
encoding="utf8",
)
total_size = int(get_streams(path)[1]["size"])
data = {}
pbar = tqdm(
total=total_size,
desc="Analyzing video",
unit_divisor=1024,
unit_scale=True,
unit="iB",
leave=False,
)
frame_num = 0
from pprint import pformat, pprint
pattern = []
for line in proc.stdout:
line = __make_dict(line)
data.update(line)
if "frame" in line:
frame_num += 1
pbar.n = max(pbar.n, min(total_size, int(line["frame"]["pkt_pos"])))
dt = pbar._time() - pbar.start_t
if dt:
pbar.set_postfix(frame=frame_num, fps=f"{frame_num / dt:.2f}")
idet = line["frame"].get("lavfi", {}).get("idet")
# rep = idet["repeated"]["current_frame"]
# single = idet["single"]["current_frame"]
# multi = idet["multiple"]["current_frame"]
# pbar.write(repr((rep, single, multi)))
pbar.update(0)
pbar.close()
ret = proc.wait()
if ret != 0:
exit(ret)
stream = data["stream"]
# 30000/1001
frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
frame_rate = Fraction(frame_rate[0], frame_rate[1])
frame_num = int(stream["nb_read_frames"])
cropdetect = data["frame"]["lavfi"]["cropdetect"]
idet = judge(data, frame_num)
crop = (
int(cropdetect["x"]),
(int(stream["width"]) - int(cropdetect["w"]) - int(cropdetect["x"])),
int(cropdetect["y"]),
(int(stream["height"]) - int(cropdetect["h"]) - int(cropdetect["y"])),
)
print(f"Cropping: {crop}")
return {"interlacing":idet, "crop":crop}
def get_frames(path):
path = path.replace("\\", "/")
proc = SP.Popen(
[
"ffprobe",
"-probesize",
str(0x7FFFFFFF),
"-analyzeduration",
str(0x7FFFFFFF),
"-v",
"fatal",
"-i",
path,
"-select_streams",
"v:0",
"-show_frames",
"-print_format",
"compact",
],
stdout=SP.PIPE,
stdin=SP.DEVNULL,
bufsize=0,
encoding="utf8",
)
for line in proc.stdout:
yield __make_dict(line)
ret = proc.wait()
if ret != 0:
exit(ret)
def get_streams(path):
proc = SP.Popen(
[
"ffprobe",
"-probesize",
str(0x7FFFFFFF),
"-analyzeduration",
str(0x7FFFFFFF),
"-v",
"fatal",
"-i",
path,
"-select_streams",
"v:0",
"-show_streams",
"-show_format",
"-print_format",
"json",
],
stdout=SP.PIPE,
stdin=SP.DEVNULL,
bufsize=0,
)
data = json.load(proc.stdout)
ret = proc.wait()
if ret != 0:
exit(ret)
return data["streams"], data["format"]
def make_header(file):
return ["DGIndexProjectFile16", "1", os.path.abspath(file)]
def make_settings(stream):
pict_size = "x".join(map(str, [stream["width"], stream["height"]]))
frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
frame_rate = (frame_rate[0] * 1000) // frame_rate[1]
frame_rate = f"{frame_rate} ({stream['r_frame_rate']})"
header = [
("Stream_Type", 0), # Elementary Stream
("MPEG_Type", 2), # MPEG-2
("iDCT_Algorithm", 5), # 64-bit IEEE-1180 Reference
("YUVRGB_Scale", int(stream["color_range"] != "tv")),
("Luminance_Filter", "0,0"),
("Clipping", "0,0,0,0"),
("Aspect_Ratio", stream["display_aspect_ratio"]),
("Picture_Size", pict_size),
("Field_Operation", 0), # Honor Pulldown Flags
("Frame_Rate", frame_rate),
("Location", "0,0,0,0"),
]
for k, v in header:
yield f"{k}={v}"
def gen_d2v(path):
yield from make_header(path)
yield ""
streams, fmt = get_streams(path)
stream = [s for s in streams if s["codec_type"] == "video"][0]
stream["index"] = str(stream["index"])
yield from make_settings(stream)
yield ""
line_buffer = []
frames = get_frames(path)
prog_bar = tqdm(
frames,
total=int(fmt["size"]),
unit_divisor=1024,
unit_scale=True,
unit="iB",
desc="Writing d2v",
leave=False,
)
cropdetect = None
idet = None
frame_num = 0
t_start=perf_counter()
for line in prog_bar:
if "frame" not in line:
continue
frame = line["frame"]
if frame["stream_index"] != stream["index"]:
continue
prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
fps=frame_num/(perf_counter()-t_start)
prog_bar.set_postfix(frame=frame_num,fps=f"{fps:.02f}")
prog_bar.update(0)
frame_num += 1
if frame["pict_type"] == "I" and line_buffer:
yield make_line(line_buffer, stream)
line_buffer.clear()
line_buffer.append(frame)
if line_buffer:
yield make_line(line_buffer, stream)
prog_bar.n = int(fmt["size"])
prog_bar.update(0)
prog_bar.close()
yield None
def make_meta(path):
outdir = os.path.dirname(path)
outfile = os.path.splitext(os.path.basename(path))[0]
outfile = os.path.join(outdir, os.path.extsep.join([outfile, "info", "json"]))
if os.path.isfile(outfile):
print(path,"already analyzed, skipping")
return
print("Analyzing", path)
meta = get_meta_interlacing(path)
streams, fmt = get_streams(path)
stream = streams[0]
var = Fraction(int(stream["width"]), int(stream["height"]))
dar = Fraction(*map(int, stream["display_aspect_ratio"].split(":")))
sar = Fraction(*map(int, stream["sample_aspect_ratio"].split(":")))
par = sar * dar
meta.update(
{
"par": [par.numerator, par.denominator],
"dar": [dar.numerator, dar.denominator],
"sar": [sar.numerator, sar.denominator],
"var": [var.numerator, var.denominator],
}
)
print(f"Aspect ratios:")
print(f" Pixel {par}")
print(f" Display {dar}")
print(f" Screen {sar}")
print(f" Video {var}")
with open(outfile, "w") as fh:
json.dump(meta, fh, indent=4)
def make_d2v(path):
outdir = os.path.dirname(path)
outfile = os.path.splitext(os.path.basename(path))[0]
outfile = os.path.join(outdir, os.path.extsep.join([outfile, "d2v"]))
outfile_tmp = os.path.extsep.join([outfile, "tmp"])
if os.path.isfile(outfile):
print(path,"already indexed, skipping")
return
print("Indexing", path)
a, b = ITT.tee(gen_d2v(path))
next(b)
with open(outfile_tmp, "w") as fh:
for line, next_line in zip(a, b):
fh.write(line)
if next_line is None: # last line, append end marker
fh.write(" ff")
fh.write("\n")
os.rename(outfile_tmp,outfile)
if __name__ == "__main__":
for file in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
make_meta(file)
make_d2v(file)