import itertools as ITT import json import os import subprocess as SP import sys from pprint import pprint from fractions import Fraction from glob import glob from collections import Counter from tqdm import tqdm from time import perf_counter def pulldown(fields_per_second, frames_per_second): f = Fraction(fields_per_second, frames_per_second) colorspace = { "gbr": 0, "bt709": 1, "unknown": 2, "fcc": 4, "bt470bg": 5, "smpte170m": 6, "smpte240m": 7, "ycgco": 8, "bt2020nc": 9, "bt2020c": 10, "smpte2085": 11, "chroma-derived-nc": 12, "chroma-derived-c": 13, "ictcp": 14, } pict_types = {"I": 0b01, "P": 0b10, "B": 0b11} def make_info(frames): has_interlaced = any(frame["interlaced_frame"] for frame in frames) new_gop = "timecode" in frames[0].get("tags", {}) info = 0x000 info |= 1 << 11 # always 1 info |= 0 << 10 # 0=Closed GOP, 1=Open GOP info |= (not has_interlaced) << 9 # Progressive info |= new_gop << 8 return info def make_flags(frames): flags = [] for frame in frames: needs_prev = False progressive = not int(frame["interlaced_frame"]) pt = pict_types[frame["pict_type"]] reserved = 0b00 tff = int(frame["top_field_first"]) rff = int(frame["repeat_pict"]) flag = 0b0 flag |= (not needs_prev) << 7 flag |= progressive << 6 flag |= pt << 4 flag |= reserved << 2 flag |= tff << 1 flag |= rff flags.append(f"{flag:02x}") return flags def make_line(frames, stream): info = f"{make_info(frames):03x}" matrix = colorspace[stream.get("color_space", "unknown")] file = 0 position = frames[0]["pkt_pos"] skip = 0 vob = 0 cell = 0 flags = make_flags(frames) return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags])) def __make_dict(line): ret = {} line = line.strip().split("|") line_type = line[0] for value in line[1:]: entry = ret if "=" not in value: continue key_path, value = value.split("=") key_path = key_path.split(".") for key in key_path[:-1]: if ":" in key: key = key.split(":")[1] entry = entry.setdefault(key, {}) entry[key_path[-1]] = value return {line_type: ret} def judge(info, num_frames): threshold = 1 # BFF/TFF threshold value min_num_frames = 250 # minimal number of frames idet = info["frame"]["lavfi"]["idet"] idet_v = {} for t in "repeated", "single", "multiple": idet_v[t] = {} for k, v in idet[t].items(): try: idet_v[t][k] = int(v) except ValueError: try: idet_v[t][k] = float(v) except ValueError: pass idet = { "repeat": {k: v for k, v in idet_v["repeated"].items()}, "single": {k: v for k, v in idet_v["single"].items()}, "multiple": {k: v for k, v in idet_v["multiple"].items()}, } repeat_err = abs( (idet["repeat"]["neither"] / num_frames) - 0.8 ) # 2:3 pulldown,4 frames @ ~24 FPS to ~30 FPS = 20% repeated fields print(f"Derivation from 2:3 Pulldown: {repeat_err:.2%}") tff = idet["multiple"]["tff"] bff = idet["multiple"]["bff"] progressive = idet["multiple"]["progressive"] interlaced = tff + bff determined = interlaced + progressive print(f"Determined: {determined}") if interlaced: print(f"Interlaced: {interlaced} (TFF: {tff/interlaced:.2%}, BFF: {bff/interlaced:.2%}) = {interlaced/determined:.2%}") else: print(f"Interlaced: {interlaced} = {interlaced/determined:.2%}") print(f"Progressive: {progressive} = {progressive/determined:.2%}") if determined == 0: return idet idet["num_frames"] = num_frames idet["interlaced"] = interlaced idet["progressive"] = progressive if determined < 50 or determined < min_num_frames: print("/!\\ Not enough information to determine interlacing type reliably, results may be inacurate /!\\") if interlaced > progressive: if tff > bff: if repeat_err < 1.0: idet["vid_type"] = "Telecined TFF" else: idet["vid_type"] = "Interlaced TFF" elif bff > tff: if repeat_err < 1.0: idet["vid_type"] = "Telecined BFF" else: idet["vid_type"] = "Interlaced BFF" else: idet["vid_type"] = "Interlaced?" else: idet["vid_type"] = "Progressive" print(f"Result: {idet['vid_type']}") return idet def get_meta_interlacing(path): path = path.replace("\\", "/") filtergraph = [ f"movie=\\'{path}\\'", "cropdetect=limit=0.5:round=2", "idet", ] proc = SP.Popen( [ "ffprobe", "-loglevel", "fatal", "-probesize", str(0x7FFFFFFF), "-analyzeduration", str(0x7FFFFFFF), "-f", "lavfi", "-i", ",".join(filtergraph), "-select_streams", "v", "-show_frames", "-show_streams", "-print_format", "compact", ], stdout=SP.PIPE, stdin=SP.DEVNULL, bufsize=0, encoding="utf8", ) total_size = int(get_streams(path)[1]["size"]) data = {} pbar = tqdm( total=total_size, desc="Analyzing video", unit_divisor=1024, unit_scale=True, unit="iB", leave=False, ) frame_num = 0 from pprint import pformat, pprint pattern = [] for line in proc.stdout: line = __make_dict(line) data.update(line) if "frame" in line: frame_num += 1 pbar.n = max(pbar.n, min(total_size, int(line["frame"]["pkt_pos"]))) dt = pbar._time() - pbar.start_t if dt: pbar.set_postfix(frame=frame_num, fps=f"{frame_num / dt:.2f}") idet = line["frame"].get("lavfi", {}).get("idet") # rep = idet["repeated"]["current_frame"] # single = idet["single"]["current_frame"] # multi = idet["multiple"]["current_frame"] # pbar.write(repr((rep, single, multi))) pbar.update(0) pbar.close() ret = proc.wait() if ret != 0: exit(ret) stream = data["stream"] # 30000/1001 frame_rate = list(map(int, stream["r_frame_rate"].split("/"))) frame_rate = Fraction(frame_rate[0], frame_rate[1]) frame_num = int(stream["nb_read_frames"]) cropdetect = data["frame"]["lavfi"]["cropdetect"] idet = judge(data, frame_num) crop = ( int(cropdetect["x"]), (int(stream["width"]) - int(cropdetect["w"]) - int(cropdetect["x"])), int(cropdetect["y"]), (int(stream["height"]) - int(cropdetect["h"]) - int(cropdetect["y"])), ) print(f"Cropping: {crop}") return {"interlacing":idet, "crop":crop} def get_frames(path): path = path.replace("\\", "/") proc = SP.Popen( [ "ffprobe", "-probesize", str(0x7FFFFFFF), "-analyzeduration", str(0x7FFFFFFF), "-v", "fatal", "-i", path, "-select_streams", "v:0", "-show_frames", "-print_format", "compact", ], stdout=SP.PIPE, stdin=SP.DEVNULL, bufsize=0, encoding="utf8", ) for line in proc.stdout: yield __make_dict(line) ret = proc.wait() if ret != 0: exit(ret) def get_streams(path): proc = SP.Popen( [ "ffprobe", "-probesize", str(0x7FFFFFFF), "-analyzeduration", str(0x7FFFFFFF), "-v", "fatal", "-i", path, "-select_streams", "v:0", "-show_streams", "-show_format", "-print_format", "json", ], stdout=SP.PIPE, stdin=SP.DEVNULL, bufsize=0, ) data = json.load(proc.stdout) ret = proc.wait() if ret != 0: exit(ret) return data["streams"], data["format"] def make_header(file): return ["DGIndexProjectFile16", "1", os.path.abspath(file)] def make_settings(stream): pict_size = "x".join(map(str, [stream["width"], stream["height"]])) frame_rate = list(map(int, stream["r_frame_rate"].split("/"))) frame_rate = (frame_rate[0] * 1000) // frame_rate[1] frame_rate = f"{frame_rate} ({stream['r_frame_rate']})" header = [ ("Stream_Type", 0), # Elementary Stream ("MPEG_Type", 2), # MPEG-2 ("iDCT_Algorithm", 5), # 64-bit IEEE-1180 Reference ("YUVRGB_Scale", int(stream["color_range"] != "tv")), ("Luminance_Filter", "0,0"), ("Clipping", "0,0,0,0"), ("Aspect_Ratio", stream["display_aspect_ratio"]), ("Picture_Size", pict_size), ("Field_Operation", 0), # Honor Pulldown Flags ("Frame_Rate", frame_rate), ("Location", "0,0,0,0"), ] for k, v in header: yield f"{k}={v}" def gen_d2v(path): yield from make_header(path) yield "" streams, fmt = get_streams(path) stream = [s for s in streams if s["codec_type"] == "video"][0] stream["index"] = str(stream["index"]) yield from make_settings(stream) yield "" line_buffer = [] frames = get_frames(path) prog_bar = tqdm( frames, total=int(fmt["size"]), unit_divisor=1024, unit_scale=True, unit="iB", desc="Writing d2v", leave=False, ) cropdetect = None idet = None frame_num = 0 t_start=perf_counter() for line in prog_bar: if "frame" not in line: continue frame = line["frame"] if frame["stream_index"] != stream["index"]: continue prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"])) fps=frame_num/(perf_counter()-t_start) prog_bar.set_postfix(frame=frame_num,fps=f"{fps:.02f}") prog_bar.update(0) frame_num += 1 if frame["pict_type"] == "I" and line_buffer: yield make_line(line_buffer, stream) line_buffer.clear() line_buffer.append(frame) if line_buffer: yield make_line(line_buffer, stream) prog_bar.n = int(fmt["size"]) prog_bar.update(0) prog_bar.close() yield None def make_meta(path): outdir = os.path.dirname(path) outfile = os.path.splitext(os.path.basename(path))[0] outfile = os.path.join(outdir, os.path.extsep.join([outfile, "info", "json"])) if os.path.isfile(outfile): print(path,"already analyzed, skipping") return print("Analyzing", path) meta = get_meta_interlacing(path) streams, fmt = get_streams(path) stream = streams[0] var = Fraction(int(stream["width"]), int(stream["height"])) dar = Fraction(*map(int, stream["display_aspect_ratio"].split(":"))) sar = Fraction(*map(int, stream["sample_aspect_ratio"].split(":"))) par = sar * dar meta.update( { "par": [par.numerator, par.denominator], "dar": [dar.numerator, dar.denominator], "sar": [sar.numerator, sar.denominator], "var": [var.numerator, var.denominator], } ) print(f"Aspect ratios:") print(f" Pixel {par}") print(f" Display {dar}") print(f" Screen {sar}") print(f" Video {var}") with open(outfile, "w") as fh: json.dump(meta, fh, indent=4) def make_d2v(path): outdir = os.path.dirname(path) outfile = os.path.splitext(os.path.basename(path))[0] outfile = os.path.join(outdir, os.path.extsep.join([outfile, "d2v"])) outfile_tmp = os.path.extsep.join([outfile, "tmp"]) if os.path.isfile(outfile): print(path,"already indexed, skipping") return print("Indexing", path) a, b = ITT.tee(gen_d2v(path)) next(b) with open(outfile_tmp, "w") as fh: for line, next_line in zip(a, b): fh.write(line) if next_line is None: # last line, append end marker fh.write(" ff") fh.write("\n") os.rename(outfile_tmp,outfile) if __name__ == "__main__": for file in ITT.chain.from_iterable(map(glob, sys.argv[1:])): make_meta(file) make_d2v(file)