Pushed latest changes, add Telecine/Interlacing detection to ff_d2v.py

2022-01-07 18:27:31 +01:00 · 2022-01-07 18:27:31 +01:00 · 5e4d9d6965
commit 5e4d9d6965
parent e06f1dfad5
11 changed files with 2403 additions and 2039 deletions
--- a/.gitignore
+++ b/.gitignore
@ -138,3 +138,13 @@ dmypy.json
 # Cython debug symbols
 cython_debug/

+
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+# Local History for Visual Studio Code
+.history/
--- a/README.md
+++ b/README.md
@ -2,9 +2,27 @@

 Choggbuster is a set of python scripts aimed at automated preprocessing of video DVDs for archival and filtering

+# Requirements
+
+- python (obviously)
+- libdvdcss (for decrypting copy protected DVDs)
+- libdvdnav (for streaming the VOBs to disk)
+- libdvdread (for reading decrypted data off of DVDs)
+- ffmpeg (for demuxing)
+- ccextractor (for extracting DVD Subtitles)
+
 # Setup (Windows)

 1. Clone the repo
 2. `pip install cffi tqdm`
 3. Grab [libdvdread, libdvdnav](https://www.videolan.org/developers/libdvdnav.html) and [libdvdcss](https://www.videolan.org/developers/libdvdcss.html) from VLC and drop them next to `dvd_ripper.py`
 4. `python dvd_ripper.py F:\` or `python dvd_ripper.py D:\path\to\DVD.ISO`
+5. this will create a folder `out` with a subfolder for the disc containing:
+    - JSON file with metadata for the DVD title (`XXXX.json` where `X` is the title number)
+    - demuxed streams (`tXXX_aYYY_Z_0xAAA.{ext}` where `X` is the title number, `Y` is the angle number `Z` is stream index and `AAA` is the stream id)
+        - `.m2v` for video
+        - `.ac3` or `.dtx` for audio
+        - `.sub` and `.idx` for subtitles
+        - `.srt` for captions
+    - `.d2v` file for use with AviSynth and Vapoursynth D2V reader
+    - `.info.json` file containing video stream metadata (cropping information, interlaced/progressive frame count and aspect ration information)
--- a/dvd_ripper.py
+++ b/dvd_ripper.py
@ -1,65 +1,99 @@
-import cffi
+import itertools as ITT
+import json
 import os
+import subprocess as SP
 import sys
 import time
-from dvdnav import DVDNav,DVDError
-from dvdread import DVDRead
-import subprocess as SP
-import json
 from glob import glob
-import itertools as ITT
+
+import cffi
+from datetime import timedelta
+from dvdnav import DVDError, DVDNav
+from dvdread import DVDRead
+from ff_d2v import make_d2v, make_meta
 from vob_demux import demux
-from ff_d2v import make_d2v

-def loadlib(dll_path, *includes, **kwargs):
-    ffi = cffi.FFI()
-    for include in includes:
-        ffi.cdef(open(include).read(), kwargs)
-    return ffi, ffi.dlopen(dll_path)
+def close_file_del_if_empty(fh):
+    if not fh:
+        return False
+    if fh.tell() == 0:
+        fh.close()
+        os.unlink(fh.name)
+        return False
+    else:
+        fh.close()
+        return True

-for dvd_path in ITT.chain.from_iterable(map(glob,sys.argv[1:])):
+
+dur_thr = 60.0
+
+def process_m2v_files(path):
+    for file in glob(os.path.join(path,"**", "*.m2v")):
+        make_meta(file)
+        make_d2v(file)
+
+
+for dvd_path in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
    r = DVDRead(dvd_path)
    # r.grab_ifos()
    # r.grab_vobs()
    # exit()
-
-    out_folder = os.path.join(
-        "out", "_".join([r.disc_id, r.udf_disc_name or r.iso_disc_name]).replace(" ", "_")
-    )
-    os.makedirs(out_folder, exist_ok=True)
+    if os.path.isfile(dvd_path):
+        basename = os.path.splitext(os.path.basename(dvd_path))[0]
+    else:
+        basename = r.iso_disc_name or r.udf_disc_name
+    base_dir = os.path.join("out", "_".join([basename, r.disc_id]).replace(" ", "_"))
+    if os.path.isdir(base_dir):
+        print(f"Output foldrer {base_dir} exists, remove to re-rip DVD")
+        process_m2v_files(base_dir)
+        continue
+    os.makedirs(base_dir, exist_ok=True)
    d = DVDNav(dvd_path)
    to_demux = []
    for k, v in d.titles.items():
+        out_folder=os.path.join(base_dir,f"t{k:03}")
        v["duration"] = v["duration"].total_seconds()
-        v["chapters"] = [c.total_seconds() for c in v["chapters"]]
+        if v["chapters"]:
+            v["chapters"] = [0.0]+[c.total_seconds() for c in v["chapters"]]
+            avg_chapter_len = v["duration"] / len(v["chapters"])
+            # if avg_chapter_len<10:
+            #     continue
        d.titles[k] = v
-        with open(os.path.join(out_folder, f"{k:03}.json"), "w") as fh:
-            json.dump(v, fh)
-        for a in range(0,99):
-            block=0
-            outfile = os.path.join(out_folder, f"t{k:03}_a{a:03}_b{block:03}.vob")
+        # if not v.get('audio'):
+        #     print(f"[{k}|0] Skipping title {k} because it has no audio tracks")
+        #     continue
+        # if not v.get('vts'):
+        #     print(f"[{k}|0] Skipping title {k} because it has no title sets")
+        #     continue
+        if v["duration"] < dur_thr:
+            print(
+                f"[{k}|0] Skipping title {k} because it is shorter than {dur_thr} seconds ({v['duration']} seconds)"
+            )
+            continue
+        os.makedirs(out_folder, exist_ok=True)
+        with open(os.path.join(out_folder, f"title.json"), "w") as fh:
+            json.dump(d.titles[k], fh, indent=4)
+        with open(os.path.join(out_folder, f"chapters.txt"), "w") as fh:
+            if set(v["chapters"])==set([0.0]):
+                continue
+            for n,t in enumerate(v["chapters"],1):
+                if abs(t-v["duration"])<1.0:
+                    continue
+                print(f"CHAPTER{n:02}={timedelta(seconds=t)}",file=fh)
+                print(f"CHAPTER{n:02}NAME=Chapter {n}",file=fh)
+        for a in range(0, 99):
+            outfile = os.path.join(out_folder, f"{a:03}.vob")
            to_demux.append(outfile)
            fh = open(outfile, "wb")
            try:
                for block in d.get_blocks(k, a):
-                    if isinstance(block, int):
-                        outfile = os.path.join(out_folder, f"t{k:03}_a{a:03}_b{block:03}.vob")
-                        to_demux.append(outfile)
-                        if fh:
-                            fh.close()
-                        fh = open(outfile, "wb")
-                    else:
-                        fh.write(block)
+                    fh.write(block)
            except DVDError as e:
-                if str(e)!="Invalid angle specified!":
+                if str(e) != "Invalid angle specified!":
                    raise
-            if fh.tell()==0:
-                fh.close()
-                os.unlink(fh.name)
-                while fh.name in to_demux:
-                    to_demux.remove(fh.name)
+            close_file_del_if_empty(fh)
+    to_demux = list(filter(os.path.isfile, to_demux))
    for file in to_demux:
        demux(file)
        os.unlink(file)
-    for file in glob(os.path.join(out_folder,"*.m2v")):
-        make_d2v(file)
+    process_m2v_files(base_dir)
--- a/dvdnav.py
+++ b/dvdnav.py
@ -1,8 +1,10 @@
-import cffi
-import os
 import functools
+import os
 from datetime import timedelta
+
+import cffi
 from tqdm import tqdm
+
 from dvdread import DVDRead


@ -13,6 +15,29 @@ def loadlib(dll_path, *includes, **kwargs):
    return ffi, ffi.dlopen(dll_path)


+domains = {
+    0: "None",
+    1: "FirstPlay",
+    2: "VTSTitle",
+    4: "VMGM",
+    8: "VTSMenu",
+}
+events = {
+    0: "DVDNAV_BLOCK_OK",
+    1: "DVDNAV_NOP",
+    2: "DVDNAV_STILL_FRAME",
+    3: "DVDNAV_SPU_STREAM_CHANGE",
+    4: "DVDNAV_AUDIO_STREAM_CHANGE",
+    5: "DVDNAV_VTS_CHANGE",
+    6: "DVDNAV_CELL_CHANGE",
+    7: "DVDNAV_NAV_PACKET",
+    8: "DVDNAV_STOP",
+    9: "DVDNAV_HIGHLIGHT",
+    10: "DVDNAV_SPU_CLUT_CHANGE",
+    12: "DVDNAV_HOP_CHANNEL",
+    13: "DVDNAV_WAIT",
+}
+
 class DVDError(Exception):
    pass

@ -67,27 +92,6 @@ class DVDNav(object):
        size = self.ffi.new("int32_t*", 0)
        pos = self.ffi.new("uint32_t*", 0)
        total_size = self.ffi.new("uint32_t*", 0)
-        domains = {
-            1: "FirstPlay",
-            2: "VTSTitle",
-            4: "VMGM",
-            8: "VTSMenu",
-        }
-        events = {
-            0: "DVDNAV_BLOCK_OK",
-            1: "DVDNAV_NOP",
-            2: "DVDNAV_STILL_FRAME",
-            3: "DVDNAV_SPU_STREAM_CHANGE",
-            4: "DVDNAV_AUDIO_STREAM_CHANGE",
-            5: "DVDNAV_VTS_CHANGE",
-            6: "DVDNAV_CELL_CHANGE",
-            7: "DVDNAV_NAV_PACKET",
-            8: "DVDNAV_STOP",
-            9: "DVDNAV_HIGHLIGHT",
-            10: "DVDNAV_SPU_CLUT_CHANGE",
-            12: "DVDNAV_HOP_CHANNEL",
-            13: "DVDNAV_WAIT",
-        }
        progbar = tqdm(
            unit_divisor=1024,
            unit_scale=True,
@ -96,7 +100,8 @@ class DVDNav(object):
            disable=False,
        )
        ripped = set()
-        current_vts = None
+        cells = set()
+        current_vts = (None,None)
        current_cell = None
        current_pg = None
        while True:
@ -143,26 +148,36 @@ class DVDNav(object):
                current_cell = cell.cellN
                current_pg = cell.pgN
                progbar.write(
-                    f"[{title}|{angle}] Cell: {cell.cellN} ({cell.cell_start}-{cell.cell_start+cell.cell_length}), PG: {cell.pgN} ({cell.pg_start}-{cell.pg_start+cell.pg_length})"
+                    f"[{title}|{angle}] Cell: {cell.cellN} ({hex(cell.cell_start)}-{hex(cell.cell_start+cell.cell_length)}), PG: {cell.pgN} ({hex(cell.pg_start)}-{hex(cell.pg_start+cell.pg_length)})"
                )
+                fp=(current_vts[0],current_vts[1],cell.cellN,cell.pgN,cell.cell_length,cell.pg_length,cell.pgc_length,cell.cell_start,cell.pg_start)
+                if fp in cells:
+                    progbar.write(f"[{title}|{angle}] Cells Looped!")
+                    break
+                cells.add(fp)
            elif ev[0] == self.lib.DVDNAV_VTS_CHANGE:
                vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf)
+                old_domain = domains[vts.old_domain]
+                new_domain = domains[vts.new_domain]
                new_vts = (vts.new_vtsN, vts.new_domain)
+                old_vts = (vts.old_vtsN, vts.old_domain)
                ripped.add((vts.old_vtsN, vts.old_domain))
-                # progbar.write(f"[{title}|{angle}] VTS: {vts.old_vtsN} ({vts.old_domain} {old_domain}) -> {vts.new_vtsN} ({vts.new_domain} {new_domain})")
-                if new_vts in ripped:  # looped
-                    progbar.write(f"[{title}|{angle}] Looped!")
+                cells.clear()
+                progbar.write(f"[{title}|{angle}] VTS: {vts.old_vtsN} ({vts.old_domain} {old_domain}) -> {vts.new_vtsN} ({vts.new_domain} {new_domain})")
+                if (new_vts in ripped) or new_vts==old_vts:  # looped
+                    progbar.write(f"[{title}|{angle}] VTS Looped!")
                    break
                current_vts = (vts.new_vtsN, vts.new_domain)
                if vts.new_domain == 8:  # back to menu
-                    progbar.write(f"[{title}|{angle}] Back to menu!")
+                    progbar.write(f"[{title}|{angle}] VTS Back to menu!")
                    break
-                yield vts.new_vtsN
+                # yield vts.new_vtsN
            else:
                progbar.write(
                    f"[{title}|{angle}] Unhandled: {events.get(ev[0],ev[0])} {size[0]}"
                )
        self.__check_error(self.lib.dvdnav_stop(self.dvd))
+        progbar.close()

    def __check_error(self, ret):
        if ret == self.lib.DVDNAV_STATUS_ERR:
@ -171,6 +186,44 @@ class DVDNav(object):
                raise DVDError(err)
            raise DVDError("Unknown error")

+    def __get_vts(self,title):
+        buf = self.ffi.new("char[]", 4096)
+        ev = self.ffi.new("int32_t*", self.lib.DVDNAV_NOP)
+        size = self.ffi.new("int32_t*", 0)
+        pos = self.ffi.new("uint32_t*", 0)
+        total_size = self.ffi.new("uint32_t*", 0)
+        self.__check_error(self.lib.dvdnav_set_PGC_positioning_flag(self.dvd, 1))
+        self.__check_error(self.lib.dvdnav_title_play(self.dvd, title))
+        seq=[]
+        while True:
+            self.__check_error(self.lib.dvdnav_get_next_block(self.dvd, buf, ev, size))
+            if ev[0] == self.lib.DVDNAV_BLOCK_OK:
+                self.__check_error(self.lib.dvdnav_get_position(self.dvd, pos, total_size))
+                # print(title,pos[0],total_size[0])
+                if self.lib.dvdnav_next_pg_search(self.dvd)==0:
+                    break
+            elif ev[0] == self.lib.DVDNAV_STOP:
+                break
+            elif ev[0] == self.lib.DVDNAV_STILL_FRAME:
+                self.__check_error(self.lib.dvdnav_still_skip(self.dvd))
+            elif ev[0] == self.lib.DVDNAV_WAIT:
+                self.__check_error(self.lib.dvdnav_wait_skip(self.dvd))
+            elif ev[0] == self.lib.DVDNAV_VTS_CHANGE:
+                vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf)
+                old_domain = domains[vts.old_domain]
+                new_domain = domains[vts.new_domain]
+                seq.append(
+                    (vts.new_vtsN, new_domain)
+                )
+                if vts.new_domain==8:
+                    break
+                continue
+            # print(title,ev[0],size[0])
+        self.__check_error(self.lib.dvdnav_stop(self.dvd))
+        # print(title,seq)
+        return seq
+        # self.__check_error(self.lib.dvdnav_next_pg_search(self.dvd))
+
    def __get_titles(self):
        titles = self.ffi.new("int32_t*", 0)
        p_times = self.ffi.new("uint64_t[]", 512)
@ -191,6 +244,8 @@ class DVDNav(object):
            if duration[0] == 0:
                continue
            chapters = []
+            if num_chapters==0 and times[0]==self.ffi.NULL:
+                chapters=None
            for t in range(num_chapters):
                chapters.append(timedelta(seconds=times[0][t] / 90000))
            self.titles[title] = {
@ -220,6 +275,7 @@ class DVDNav(object):
            self.__check_error(self.lib.dvdnav_title_play(self.dvd, title))
            self.titles[title]["audio"] = {}
            self.titles[title]["subtitles"] = {}
+            # self.titles[title]["vts"] = self.__get_vts(title)
            for n in range(255):
                stream_id = self.lib.dvdnav_get_audio_logical_stream(self.dvd, n)
                if stream_id == -1:
@ -241,8 +297,7 @@ class DVDNav(object):
                    3: "director's commentary",
                    4: "alternate director's commentary",
                }[audio_attrs.code_extension]
-                self.titles[title]["audio"][n] = {
-                    "stream_id": stream_id,
+                self.titles[title]["audio"][stream_id] = {
                    "lang": alang,
                    "channels": channels,
                    "codec": codec,
@ -258,8 +313,8 @@ class DVDNav(object):
                slang = None
                if spu_attr.type == 1:
                    slang = str(spu_attr.lang_code.to_bytes(2, "big"), "utf8")
-                self.titles[title]["subtitles"][n] = {
-                    "stream_id": stream_id,
+                self.titles[title]["subtitles"][stream_id] = {
                    "lang": slang,
                }
        self.__check_error(self.lib.dvdnav_stop(self.dvd))
+        # exit("DEBUG!")
--- a/dvdread.py
+++ b/dvdread.py
@ -1,9 +1,10 @@
-import cffi
-import os
-import functools
 import binascii
+import functools
+import os
 from datetime import timedelta

+import cffi
+

 def loadlib(dll_path, *includes, **kwargs):
    ffi = cffi.FFI()
@ -55,6 +56,7 @@ class DVDRead(object):
            pbar.update(num_read)
            yield self.ffi.buffer(buf,num_read)[:]
        self.lib.DVDCloseFile(fh)
+        pbar.close()

    def grab_ifos(self):
        vmg_ifo = self.lib.ifoOpen(self.dvd, 0)
--- a/ff_d2v.py
+++ b/ff_d2v.py
@ -1,9 +1,18 @@
-import sys
+import itertools as ITT
 import json
 import os
 import subprocess as SP
-import itertools as ITT
+import sys
+from pprint import pprint
+from fractions import Fraction
+from glob import glob
+from collections import Counter
 from tqdm import tqdm
+from time import perf_counter
+
+
+def pulldown(fields_per_second, frames_per_second):
+    f = Fraction(fields_per_second, frames_per_second)


 colorspace = {
@ -59,7 +68,7 @@ def make_flags(frames):

 def make_line(frames, stream):
    info = f"{make_info(frames):03x}"
-    matrix = colorspace[stream["color_space"]]
+    matrix = colorspace[stream.get("color_space", "unknown")]
    file = 0
    position = frames[0]["pkt_pos"]
    skip = 0
@ -69,7 +78,170 @@ def make_line(frames, stream):
    return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags]))


+def __make_dict(line):
+    ret = {}
+    line = line.strip().split("|")
+    line_type = line[0]
+    for value in line[1:]:
+        entry = ret
+        if "=" not in value:
+            continue
+        key_path, value = value.split("=")
+        key_path = key_path.split(".")
+        for key in key_path[:-1]:
+            if ":" in key:
+                key = key.split(":")[1]
+            entry = entry.setdefault(key, {})
+        entry[key_path[-1]] = value
+    return {line_type: ret}
+
+
+def judge(info, num_frames):
+    threshold = 1  # BFF/TFF threshold value
+    min_num_frames = 250  # minimal number of frames
+    idet = info["frame"]["lavfi"]["idet"]
+    idet_v = {}
+    for t in "repeated", "single", "multiple":
+        idet_v[t] = {}
+        for k, v in idet[t].items():
+            try:
+                idet_v[t][k] = int(v)
+            except ValueError:
+                try:
+                    idet_v[t][k] = float(v)
+                except ValueError:
+                    pass
+    idet = {
+        "repeat": {k: v for k, v in idet_v["repeated"].items()},
+        "single": {k: v for k, v in idet_v["single"].items()},
+        "multiple": {k: v for k, v in idet_v["multiple"].items()},
+    }
+    repeat_err = abs(
+        (idet["repeat"]["neither"] / num_frames) - 0.8
+    )  # 2:3 pulldown,4 frames @ ~24 FPS to ~30 FPS = 20% repeated fields
+    print(f"Derivation from 2:3 Pulldown: {repeat_err:.2%}")
+    tff = idet["multiple"]["tff"]
+    bff = idet["multiple"]["bff"]
+    progressive = idet["multiple"]["progressive"]
+    interlaced = tff + bff
+    determined = interlaced + progressive
+    print(f"Determined: {determined}")
+    if interlaced:
+        print(f"Interlaced: {interlaced} (TFF: {tff/interlaced:.2%}, BFF: {bff/interlaced:.2%}) = {interlaced/determined:.2%}")
+    else:
+        print(f"Interlaced: {interlaced} = {interlaced/determined:.2%}")
+    print(f"Progressive: {progressive} = {progressive/determined:.2%}")
+    if determined == 0:
+        return idet
+    idet["num_frames"] = num_frames
+    idet["interlaced"] = interlaced
+    idet["progressive"] = progressive
+    if determined < 50 or determined < min_num_frames:
+        print("/!\\ Not enough information to determine interlacing type reliably, results may be inacurate /!\\")
+    if interlaced > progressive:
+        if tff > bff:
+            if repeat_err < 1.0:
+                idet["vid_type"] = "Telecined TFF"
+            else:
+                idet["vid_type"] = "Interlaced TFF"
+        elif bff > tff:
+            if repeat_err < 1.0:
+                idet["vid_type"] = "Telecined BFF"
+            else:
+                idet["vid_type"] = "Interlaced BFF"
+        else:
+            idet["vid_type"] = "Interlaced?"
+    else:
+        idet["vid_type"] = "Progressive"
+    print(f"Result: {idet['vid_type']}")
+    return idet
+
+
+def get_meta_interlacing(path):
+    path = path.replace("\\", "/")
+    filtergraph = [
+        f"movie=\\'{path}\\'",
+        "cropdetect=limit=0.5:round=2",
+        "idet",
+    ]
+    proc = SP.Popen(
+        [
+            "ffprobe",
+            "-loglevel",
+            "fatal",
+            "-probesize",
+            str(0x7FFFFFFF),
+            "-analyzeduration",
+            str(0x7FFFFFFF),
+            "-f",
+            "lavfi",
+            "-i",
+            ",".join(filtergraph),
+            "-select_streams",
+            "v",
+            "-show_frames",
+            "-show_streams",
+            "-print_format",
+            "compact",
+        ],
+        stdout=SP.PIPE,
+        stdin=SP.DEVNULL,
+        bufsize=0,
+        encoding="utf8",
+    )
+    total_size = int(get_streams(path)[1]["size"])
+    data = {}
+    pbar = tqdm(
+        total=total_size,
+        desc="Analyzing video",
+        unit_divisor=1024,
+        unit_scale=True,
+        unit="iB",
+        leave=False,
+    )
+    frame_num = 0
+    from pprint import pformat, pprint
+
+    pattern = []
+    for line in proc.stdout:
+        line = __make_dict(line)
+        data.update(line)
+        if "frame" in line:
+            frame_num += 1
+            pbar.n = max(pbar.n, min(total_size, int(line["frame"]["pkt_pos"])))
+            dt = pbar._time() - pbar.start_t
+            if dt:
+                pbar.set_postfix(frame=frame_num, fps=f"{frame_num / dt:.2f}")
+            idet = line["frame"].get("lavfi", {}).get("idet")
+            # rep = idet["repeated"]["current_frame"]
+            # single = idet["single"]["current_frame"]
+            # multi = idet["multiple"]["current_frame"]
+            # pbar.write(repr((rep, single, multi)))
+            pbar.update(0)
+    pbar.close()
+    ret = proc.wait()
+    if ret != 0:
+        exit(ret)
+    stream = data["stream"]
+    # 30000/1001
+    frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
+    frame_rate = Fraction(frame_rate[0], frame_rate[1])
+
+    frame_num = int(stream["nb_read_frames"])
+    cropdetect = data["frame"]["lavfi"]["cropdetect"]
+    idet = judge(data, frame_num)
+    crop = (
+        int(cropdetect["x"]),
+        (int(stream["width"]) - int(cropdetect["w"]) - int(cropdetect["x"])),
+        int(cropdetect["y"]),
+        (int(stream["height"]) - int(cropdetect["h"]) - int(cropdetect["y"])),
+    )
+    print(f"Cropping: {crop}")
+    return {"interlacing":idet, "crop":crop}
+
+
 def get_frames(path):
+    path = path.replace("\\", "/")
    proc = SP.Popen(
        [
            "ffprobe",
@ -90,16 +262,13 @@ def get_frames(path):
        stdout=SP.PIPE,
        stdin=SP.DEVNULL,
        bufsize=0,
+        encoding="utf8",
    )
-    data = None
    for line in proc.stdout:
-        line = str(line, "utf8").strip().split("|")
-        line = {line[0]: dict(v.split("=") for v in line[1:])}
-        yield line
+        yield __make_dict(line)
    ret = proc.wait()
    if ret != 0:
        exit(ret)
-    return data


 def get_streams(path):
@ -175,31 +344,87 @@ def gen_d2v(path):
        unit_scale=True,
        unit="iB",
        desc="Writing d2v",
+        leave=False,
    )
+    cropdetect = None
+    idet = None
+    frame_num = 0
+    t_start=perf_counter()
    for line in prog_bar:
        if "frame" not in line:
            continue
        frame = line["frame"]
-        prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
-        prog_bar.update(0)
        if frame["stream_index"] != stream["index"]:
            continue
+        prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
+        fps=frame_num/(perf_counter()-t_start)
+        prog_bar.set_postfix(frame=frame_num,fps=f"{fps:.02f}")
+        prog_bar.update(0)
+        frame_num += 1
        if frame["pict_type"] == "I" and line_buffer:
            yield make_line(line_buffer, stream)
            line_buffer.clear()
        line_buffer.append(frame)
+    if line_buffer:
+        yield make_line(line_buffer, stream)
+    prog_bar.n = int(fmt["size"])
+    prog_bar.update(0)
    prog_bar.close()
    yield None

+def make_meta(path):
+    outdir = os.path.dirname(path)
+    outfile = os.path.splitext(os.path.basename(path))[0]
+    outfile = os.path.join(outdir, os.path.extsep.join([outfile, "info", "json"]))
+    if os.path.isfile(outfile):
+        print(path,"already analyzed, skipping")
+        return
+    print("Analyzing", path)
+    meta = get_meta_interlacing(path)
+    streams, fmt = get_streams(path)
+    stream = streams[0]
+    var = Fraction(int(stream["width"]), int(stream["height"]))
+    dar = Fraction(*map(int, stream["display_aspect_ratio"].split(":")))
+    sar = Fraction(*map(int, stream["sample_aspect_ratio"].split(":")))
+    par = sar * dar
+    meta.update(
+        {
+            "par": [par.numerator, par.denominator],
+            "dar": [dar.numerator, dar.denominator],
+            "sar": [sar.numerator, sar.denominator],
+            "var": [var.numerator, var.denominator],
+        }
+    )
+    print(f"Aspect ratios:")
+    print(f"    Pixel   {par}")
+    print(f"    Display {dar}")
+    print(f"    Screen  {sar}")
+    print(f"    Video   {var}")
+    with open(outfile, "w") as fh:
+        json.dump(meta, fh, indent=4)
+

 def make_d2v(path):
+    outdir = os.path.dirname(path)
    outfile = os.path.splitext(os.path.basename(path))[0]
-    outfile = os.path.extsep.join([outfile, "d2v"])
+    outfile = os.path.join(outdir, os.path.extsep.join([outfile, "d2v"]))
+    outfile_tmp = os.path.extsep.join([outfile, "tmp"])
+    if os.path.isfile(outfile):
+        print(path,"already indexed, skipping")
+        return
+    print("Indexing", path)
    a, b = ITT.tee(gen_d2v(path))
    next(b)
-    with open(outfile, "w") as fh:
+    with open(outfile_tmp, "w") as fh:
        for line, next_line in zip(a, b):
            fh.write(line)
            if next_line is None:  # last line, append end marker
                fh.write(" ff")
            fh.write("\n")
+    os.rename(outfile_tmp,outfile)
+
+
+if __name__ == "__main__":
+    for file in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
+        make_meta(file)
+        make_d2v(file)
--- a/vob_demux.py
+++ b/vob_demux.py
@ -1,7 +1,8 @@
-import sys
-import os
 import json
+import os
 import subprocess as SP
+import sys
+import shutil


 def get_streams(path):
@ -31,22 +32,35 @@ def get_streams(path):
        return [], {}
    return data["streams"], data["format"]

+def ccextract(files):
+    ccextractor = shutil.which("ccextractor") or shutil.which("ccextractorwinfull")
+    if ccextractor is None and os.name=="nt":
+            ccextractor=os.path.expandvars(os.path.join("${PROGRAMFILES(X86)}","CCExtractor","ccextractorwinfull.exe"))
+    if not os.path.isfile(ccextractor):
+        print("WARNING: CCExtractor not found")
+        return []
+    new_files=[]
+    for file in files:
+        outfile=os.path.splitext(file)[0]
+        outfile=os.path.extsep.join([outfile, "cc.srt"])
+        ret=SP.call([ccextractor, "-sc", "-sbs", "-autodash", "-trim","-nobom","-o", outfile, file])
+        if ret==10:
+            if os.path.isfile(outfile):
+                os.unlink(outfile)
+                continue
+        new_files.append(outfile)
+    return new_files

-types = {
-    "mpeg2video": "m2v",
-    "ac3": "ac3",
-    "dvd_subtitle": "sup",
-}

+types = {"mpeg2video": "m2v", "ac3": "ac3", "dvd_subtitle": "sub.mkv", "eia_608": "srt"}

 def demux(path):
    folder = os.path.dirname(path)
    basename = os.path.splitext(os.path.basename(path))[0]
-    streams, fmt = get_streams(path)
+    streams, _ = get_streams(path)
    cmd = [
        "ffmpeg",
        "-y",
-        # "-fflags","+genpts+igndts",
        "-probesize",
        str(0x7FFFFFFF),
        "-analyzeduration",
@ -55,33 +69,39 @@ def demux(path):
        path,
        "-strict",
        "-2",
-        "-vcodec",
-        "copy",
-        "-acodec",
-        "copy",
-        "-scodec",
-        "copy",
    ]
-    need_ffmpeg = False
+    caption_files = []
    for stream in streams:
        codec = stream["codec_name"]
        ext = types.get(codec, codec)
-        idx = stream["index"]
        hex_id = stream["id"]
        codec_name = stream["codec_long_name"]
-        outfile = os.path.join(folder, f"{basename}_{idx}_{hex_id}")
-        if codec=="dvd_nav_packet":
+        outfile = os.path.join(folder, f"{basename}_{hex_id}")
+        if codec == "dvd_nav_packet":
            continue
-        print(idx, hex_id, codec_name, codec)
-        if codec == "dvd_subtitle":
-            SP.check_call([
-                "mencoder",path,"-vobsuboutindex",str(idx),"-vobsubout", outfile,"-nosound","-ovc", "copy", "-o",os.devnull
-            ])
-            continue
-        cmd += ["-map", f"0:#{hex_id}", "-strict", "-2", outfile + f".{ext}"]
-        need_ffmpeg = True
-    if need_ffmpeg:
-        SP.check_call(cmd)
+        outfile = os.path.extsep.join([outfile, ext])
+        print(hex_id, codec_name, codec)
+        if codec == "mpeg2video":
+            caption_files.append(outfile)
+        cmd += [
+            "-map",
+            f"0:#{hex_id}",
+            "-vcodec",
+            "copy",
+            "-acodec",
+            "copy",
+            "-scodec",
+            "copy",
+            "-strict",
+            "-2",
+            outfile,
+        ]
+    SP.check_call(cmd)
+    ccextract(caption_files)
+    for file in os.listdir(folder):
+        if os.path.isfile(file):
+            if os.stat(file).st_size==0:
+                os.unlink(file)

-if __name__=="__main__":
+if __name__ == "__main__":
    demux(sys.argv[1])