Choggbuster/ff_d2v.py

import itertools as ITT
import json
import os
import subprocess as SP
import sys
from pprint import pprint
from fractions import Fraction
from glob import glob
from collections import Counter
from tqdm import tqdm
from time import perf_counter


def pulldown(fields_per_second, frames_per_second):
    f = Fraction(fields_per_second, frames_per_second)


colorspace = {
    "gbr": 0,
    "bt709": 1,
    "unknown": 2,
    "fcc": 4,
    "bt470bg": 5,
    "smpte170m": 6,
    "smpte240m": 7,
    "ycgco": 8,
    "bt2020nc": 9,
    "bt2020c": 10,
    "smpte2085": 11,
    "chroma-derived-nc": 12,
    "chroma-derived-c": 13,
    "ictcp": 14,
}

pict_types = {"I": 0b01, "P": 0b10, "B": 0b11}


def make_info(frames):
    has_interlaced = any(frame["interlaced_frame"] for frame in frames)
    new_gop = "timecode" in frames[0].get("tags", {})
    info = 0x000
    info |= 1 << 11  # always 1
    info |= 0 << 10  # 0=Closed GOP, 1=Open GOP
    info |= (not has_interlaced) << 9  # Progressive
    info |= new_gop << 8
    return info


def make_flags(frames):
    flags = []
    for frame in frames:
        needs_prev = False
        progressive = not int(frame["interlaced_frame"])
        pt = pict_types[frame["pict_type"]]
        reserved = 0b00
        tff = int(frame["top_field_first"])
        rff = int(frame["repeat_pict"])
        flag = 0b0
        flag |= (not needs_prev) << 7
        flag |= progressive << 6
        flag |= pt << 4
        flag |= reserved << 2
        flag |= tff << 1
        flag |= rff
        flags.append(f"{flag:02x}")
    return flags


def make_line(frames, stream):
    info = f"{make_info(frames):03x}"
    matrix = colorspace[stream.get("color_space", "unknown")]
    file = 0
    position = frames[0]["pkt_pos"]
    skip = 0
    vob = 0
    cell = 0
    flags = make_flags(frames)
    return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags]))


def __make_dict(line):
    ret = {}
    line = line.strip().split("|")
    line_type = line[0]
    for value in line[1:]:
        entry = ret
        if "=" not in value:
            continue
        key_path, value = value.split("=")
        key_path = key_path.split(".")
        for key in key_path[:-1]:
            if ":" in key:
                key = key.split(":")[1]
            entry = entry.setdefault(key, {})
        entry[key_path[-1]] = value
    return {line_type: ret}


def judge(info, num_frames):
    threshold = 1  # BFF/TFF threshold value
    min_num_frames = 250  # minimal number of frames
    idet = info["frame"]["lavfi"]["idet"]
    idet_v = {}
    for t in "repeated", "single", "multiple":
        idet_v[t] = {}
        for k, v in idet[t].items():
            try:
                idet_v[t][k] = int(v)
            except ValueError:
                try:
                    idet_v[t][k] = float(v)
                except ValueError:
                    pass
    idet = {
        "repeat": {k: v for k, v in idet_v["repeated"].items()},
        "single": {k: v for k, v in idet_v["single"].items()},
        "multiple": {k: v for k, v in idet_v["multiple"].items()},
    }
    repeat_err = abs(
        (idet["repeat"]["neither"] / num_frames) - 0.8
    )  # 2:3 pulldown,4 frames @ ~24 FPS to ~30 FPS = 20% repeated fields
    print(f"Derivation from 2:3 Pulldown: {repeat_err:.2%}")
    tff = idet["multiple"]["tff"]
    bff = idet["multiple"]["bff"]
    progressive = idet["multiple"]["progressive"]
    interlaced = tff + bff
    determined = interlaced + progressive
    print(f"Determined: {determined}")
    if interlaced:
        print(f"Interlaced: {interlaced} (TFF: {tff/interlaced:.2%}, BFF: {bff/interlaced:.2%}) = {interlaced/determined:.2%}")
    else:
        print(f"Interlaced: {interlaced} = {interlaced/determined:.2%}")
    print(f"Progressive: {progressive} = {progressive/determined:.2%}")
    if determined == 0:
        return idet
    idet["num_frames"] = num_frames
    idet["interlaced"] = interlaced
    idet["progressive"] = progressive
    if determined < 50 or determined < min_num_frames:
        print("/!\\ Not enough information to determine interlacing type reliably, results may be inacurate /!\\")
    if interlaced > progressive:
        if tff > bff:
            if repeat_err < 1.0:
                idet["vid_type"] = "Telecined TFF"
            else:
                idet["vid_type"] = "Interlaced TFF"
        elif bff > tff:
            if repeat_err < 1.0:
                idet["vid_type"] = "Telecined BFF"
            else:
                idet["vid_type"] = "Interlaced BFF"
        else:
            idet["vid_type"] = "Interlaced?"
    else:
        idet["vid_type"] = "Progressive"
    print(f"Result: {idet['vid_type']}")
    return idet


def get_meta_interlacing(path):
    path = path.replace("\\", "/")
    filtergraph = [
        f"movie=\\'{path}\\'",
        "cropdetect=limit=0.5:round=2",
        "idet",
    ]
    proc = SP.Popen(
        [
            "ffprobe",
            "-loglevel",
            "fatal",
            "-probesize",
            str(0x7FFFFFFF),
            "-analyzeduration",
            str(0x7FFFFFFF),
            "-f",
            "lavfi",
            "-i",
            ",".join(filtergraph),
            "-select_streams",
            "v",
            "-show_frames",
            "-show_streams",
            "-print_format",
            "compact",
        ],
        stdout=SP.PIPE,
        stdin=SP.DEVNULL,
        bufsize=0,
        encoding="utf8",
    )
    total_size = int(get_streams(path)[1]["size"])
    data = {}
    pbar = tqdm(
        total=total_size,
        desc="Analyzing video",
        unit_divisor=1024,
        unit_scale=True,
        unit="iB",
        leave=False,
    )
    frame_num = 0
    from pprint import pformat, pprint

    pattern = []
    for line in proc.stdout:
        line = __make_dict(line)
        data.update(line)
        if "frame" in line:
            frame_num += 1
            pbar.n = max(pbar.n, min(total_size, int(line["frame"]["pkt_pos"])))
            dt = pbar._time() - pbar.start_t
            if dt:
                pbar.set_postfix(frame=frame_num, fps=f"{frame_num / dt:.2f}")
            idet = line["frame"].get("lavfi", {}).get("idet")
            # rep = idet["repeated"]["current_frame"]
            # single = idet["single"]["current_frame"]
            # multi = idet["multiple"]["current_frame"]
            # pbar.write(repr((rep, single, multi)))
            pbar.update(0)
    pbar.close()
    ret = proc.wait()
    if ret != 0:
        exit(ret)
    stream = data["stream"]
    # 30000/1001
    frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
    frame_rate = Fraction(frame_rate[0], frame_rate[1])

    frame_num = int(stream["nb_read_frames"])
    cropdetect = data["frame"]["lavfi"]["cropdetect"]
    idet = judge(data, frame_num)
    crop = (
        int(cropdetect["x"]),
        (int(stream["width"]) - int(cropdetect["w"]) - int(cropdetect["x"])),
        int(cropdetect["y"]),
        (int(stream["height"]) - int(cropdetect["h"]) - int(cropdetect["y"])),
    )
    print(f"Cropping: {crop}")
    return {"interlacing":idet, "crop":crop}


def get_frames(path):
    path = path.replace("\\", "/")
    proc = SP.Popen(
        [
            "ffprobe",
            "-probesize",
            str(0x7FFFFFFF),
            "-analyzeduration",
            str(0x7FFFFFFF),
            "-v",
            "fatal",
            "-i",
            path,
            "-select_streams",
            "v:0",
            "-show_frames",
            "-print_format",
            "compact",
        ],
        stdout=SP.PIPE,
        stdin=SP.DEVNULL,
        bufsize=0,
        encoding="utf8",
    )
    for line in proc.stdout:
        yield __make_dict(line)
    ret = proc.wait()
    if ret != 0:
        exit(ret)


def get_streams(path):
    proc = SP.Popen(
        [
            "ffprobe",
            "-probesize",
            str(0x7FFFFFFF),
            "-analyzeduration",
            str(0x7FFFFFFF),
            "-v",
            "fatal",
            "-i",
            path,
            "-select_streams",
            "v:0",
            "-show_streams",
            "-show_format",
            "-print_format",
            "json",
        ],
        stdout=SP.PIPE,
        stdin=SP.DEVNULL,
        bufsize=0,
    )
    data = json.load(proc.stdout)
    ret = proc.wait()
    if ret != 0:
        exit(ret)
    return data["streams"], data["format"]


def make_header(file):
    return ["DGIndexProjectFile16", "1", os.path.abspath(file)]


def make_settings(stream):
    pict_size = "x".join(map(str, [stream["width"], stream["height"]]))
    frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
    frame_rate = (frame_rate[0] * 1000) // frame_rate[1]
    frame_rate = f"{frame_rate} ({stream['r_frame_rate']})"
    header = [
        ("Stream_Type", 0),  # Elementary Stream
        ("MPEG_Type", 2),  # MPEG-2
        ("iDCT_Algorithm", 5),  # 64-bit IEEE-1180 Reference
        ("YUVRGB_Scale", int(stream["color_range"] != "tv")),
        ("Luminance_Filter", "0,0"),
        ("Clipping", "0,0,0,0"),
        ("Aspect_Ratio", stream["display_aspect_ratio"]),
        ("Picture_Size", pict_size),
        ("Field_Operation", 0),  # Honor Pulldown Flags
        ("Frame_Rate", frame_rate),
        ("Location", "0,0,0,0"),
    ]
    for k, v in header:
        yield f"{k}={v}"


def gen_d2v(path):
    yield from make_header(path)
    yield ""
    streams, fmt = get_streams(path)
    stream = [s for s in streams if s["codec_type"] == "video"][0]
    stream["index"] = str(stream["index"])
    yield from make_settings(stream)
    yield ""
    line_buffer = []
    frames = get_frames(path)
    prog_bar = tqdm(
        frames,
        total=int(fmt["size"]),
        unit_divisor=1024,
        unit_scale=True,
        unit="iB",
        desc="Writing d2v",
        leave=False,
    )
    cropdetect = None
    idet = None
    frame_num = 0
    t_start=perf_counter()
    for line in prog_bar:
        if "frame" not in line:
            continue
        frame = line["frame"]
        if frame["stream_index"] != stream["index"]:
            continue
        prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
        fps=frame_num/(perf_counter()-t_start)
        prog_bar.set_postfix(frame=frame_num,fps=f"{fps:.02f}")
        prog_bar.update(0)
        frame_num += 1
        if frame["pict_type"] == "I" and line_buffer:
            yield make_line(line_buffer, stream)
            line_buffer.clear()
        line_buffer.append(frame)
    if line_buffer:
        yield make_line(line_buffer, stream)
    prog_bar.n = int(fmt["size"])
    prog_bar.update(0)
    prog_bar.close()
    yield None

def make_meta(path):
    outdir = os.path.dirname(path)
    outfile = os.path.splitext(os.path.basename(path))[0]
    outfile = os.path.join(outdir, os.path.extsep.join([outfile, "info", "json"]))
    if os.path.isfile(outfile):
        print(path,"already analyzed, skipping")
        return
    print("Analyzing", path)
    meta = get_meta_interlacing(path)
    streams, fmt = get_streams(path)
    stream = streams[0]
    var = Fraction(int(stream["width"]), int(stream["height"]))
    dar = Fraction(*map(int, stream["display_aspect_ratio"].split(":")))
    sar = Fraction(*map(int, stream["sample_aspect_ratio"].split(":")))
    par = sar * dar
    meta.update(
        {
            "par": [par.numerator, par.denominator],
            "dar": [dar.numerator, dar.denominator],
            "sar": [sar.numerator, sar.denominator],
            "var": [var.numerator, var.denominator],
        }
    )
    print(f"Aspect ratios:")
    print(f"    Pixel   {par}")
    print(f"    Display {dar}")
    print(f"    Screen  {sar}")
    print(f"    Video   {var}")
    with open(outfile, "w") as fh:
        json.dump(meta, fh, indent=4)


def make_d2v(path):
    outdir = os.path.dirname(path)
    outfile = os.path.splitext(os.path.basename(path))[0]
    outfile = os.path.join(outdir, os.path.extsep.join([outfile, "d2v"]))
    outfile_tmp = os.path.extsep.join([outfile, "tmp"])
    if os.path.isfile(outfile):
        print(path,"already indexed, skipping")
        return
    print("Indexing", path)
    a, b = ITT.tee(gen_d2v(path))
    next(b)
    with open(outfile_tmp, "w") as fh:
        for line, next_line in zip(a, b):
            fh.write(line)
            if next_line is None:  # last line, append end marker
                fh.write(" ff")
            fh.write("\n")
    os.rename(outfile_tmp,outfile)


if __name__ == "__main__":
    for file in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
        make_meta(file)
        make_d2v(file)