Pushed latest changes, add Telecine/Interlacing detection to ff_d2v.py

2022-01-07 18:27:31 +01:00 · 2022-01-07 18:27:31 +01:00 · 5e4d9d6965
commit 5e4d9d6965
parent e06f1dfad5
11 changed files with 2403 additions and 2039 deletions
--- a/.gitignore
+++ b/.gitignore
@ -138,3 +138,13 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 .vscode/*
 !.vscode/settings.json
 !.vscode/tasks.json
 !.vscode/launch.json
 !.vscode/extensions.json
 *.code-workspace
 # Local History for Visual Studio Code
 .history/
--- a/README.md
+++ b/README.md
@ -2,9 +2,27 @@
 Choggbuster is a set of python scripts aimed at automated preprocessing of video DVDs for archival and filtering
 # Requirements
 - python (obviously)
 - libdvdcss (for decrypting copy protected DVDs)
 - libdvdnav (for streaming the VOBs to disk)
 - libdvdread (for reading decrypted data off of DVDs)
 - ffmpeg (for demuxing)
 - ccextractor (for extracting DVD Subtitles)
 # Setup (Windows)
 1. Clone the repo
 2. `pip install cffi tqdm`
 3. Grab [libdvdread, libdvdnav](https://www.videolan.org/developers/libdvdnav.html) and [libdvdcss](https://www.videolan.org/developers/libdvdcss.html) from VLC and drop them next to `dvd_ripper.py`
 4. `python dvd_ripper.py F:\` or `python dvd_ripper.py D:\path\to\DVD.ISO`
 5. this will create a folder `out` with a subfolder for the disc containing:
    - JSON file with metadata for the DVD title (`XXXX.json` where `X` is the title number)
    - demuxed streams (`tXXX_aYYY_Z_0xAAA.{ext}` where `X` is the title number, `Y` is the angle number `Z` is stream index and `AAA` is the stream id)
        - `.m2v` for video
        - `.ac3` or `.dtx` for audio
        - `.sub` and `.idx` for subtitles
        - `.srt` for captions
    - `.d2v` file for use with AviSynth and Vapoursynth D2V reader
    - `.info.json` file containing video stream metadata (cropping information, interlaced/progressive frame count and aspect ration information)
--- a/dvd_ripper.py
+++ b/dvd_ripper.py
@ -1,65 +1,99 @@
-import cffi
+import itertools as ITT
 import json
 import os
 import subprocess as SP
 import sys
 import time
 from dvdnav import DVDNav,DVDError
 from dvdread import DVDRead
 import subprocess as SP
 import json
 from glob import glob
 import itertools as ITT
 from vob_demux import demux
 from ff_d2v import make_d2v
-def loadlib(dll_path, *includes, **kwargs):
+import cffi
-    ffi = cffi.FFI()
+from datetime import timedelta
-    for include in includes:
+from dvdnav import DVDError, DVDNav
-        ffi.cdef(open(include).read(), kwargs)
+from dvdread import DVDRead
-    return ffi, ffi.dlopen(dll_path)
+from ff_d2v import make_d2v, make_meta
 from vob_demux import demux
 def close_file_del_if_empty(fh):
    if not fh:
        return False
    if fh.tell() == 0:
        fh.close()
        os.unlink(fh.name)
        return False
    else:
        fh.close()
        return True
 dur_thr = 60.0
 def process_m2v_files(path):
    for file in glob(os.path.join(path,"**", "*.m2v")):
        make_meta(file)
        make_d2v(file)
 for dvd_path in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
    r = DVDRead(dvd_path)
    # r.grab_ifos()
    # r.grab_vobs()
    # exit()
-
+    if os.path.isfile(dvd_path):
-    out_folder = os.path.join(
+        basename = os.path.splitext(os.path.basename(dvd_path))[0]
-        "out", "_".join([r.disc_id, r.udf_disc_name or r.iso_disc_name]).replace(" ", "_")
+    else:
-    )
+        basename = r.iso_disc_name or r.udf_disc_name
-    os.makedirs(out_folder, exist_ok=True)
+    base_dir = os.path.join("out", "_".join([basename, r.disc_id]).replace(" ", "_"))
    if os.path.isdir(base_dir):
        print(f"Output foldrer {base_dir} exists, remove to re-rip DVD")
        process_m2v_files(base_dir)
        continue
    os.makedirs(base_dir, exist_ok=True)
    d = DVDNav(dvd_path)
    to_demux = []
    for k, v in d.titles.items():
        out_folder=os.path.join(base_dir,f"t{k:03}")
        v["duration"] = v["duration"].total_seconds()
-        v["chapters"] = [c.total_seconds() for c in v["chapters"]]
+        if v["chapters"]:
            v["chapters"] = [0.0]+[c.total_seconds() for c in v["chapters"]]
            avg_chapter_len = v["duration"] / len(v["chapters"])
            # if avg_chapter_len<10:
            #     continue
        d.titles[k] = v
-        with open(os.path.join(out_folder, f"{k:03}.json"), "w") as fh:
+        # if not v.get('audio'):
-            json.dump(v, fh)
+        #     print(f"[{k}|0] Skipping title {k} because it has no audio tracks")
        #     continue
        # if not v.get('vts'):
        #     print(f"[{k}|0] Skipping title {k} because it has no title sets")
        #     continue
        if v["duration"] < dur_thr:
            print(
                f"[{k}|0] Skipping title {k} because it is shorter than {dur_thr} seconds ({v['duration']} seconds)"
            )
            continue
        os.makedirs(out_folder, exist_ok=True)
        with open(os.path.join(out_folder, f"title.json"), "w") as fh:
            json.dump(d.titles[k], fh, indent=4)
        with open(os.path.join(out_folder, f"chapters.txt"), "w") as fh:
            if set(v["chapters"])==set([0.0]):
                continue
            for n,t in enumerate(v["chapters"],1):
                if abs(t-v["duration"])<1.0:
                    continue
                print(f"CHAPTER{n:02}={timedelta(seconds=t)}",file=fh)
                print(f"CHAPTER{n:02}NAME=Chapter {n}",file=fh)
        for a in range(0, 99):
-            block=0
+            outfile = os.path.join(out_folder, f"{a:03}.vob")
            outfile = os.path.join(out_folder, f"t{k:03}_a{a:03}_b{block:03}.vob")
            to_demux.append(outfile)
            fh = open(outfile, "wb")
            try:
                for block in d.get_blocks(k, a):
                    if isinstance(block, int):
                        outfile = os.path.join(out_folder, f"t{k:03}_a{a:03}_b{block:03}.vob")
                        to_demux.append(outfile)
                        if fh:
                            fh.close()
                        fh = open(outfile, "wb")
                    else:
                    fh.write(block)
            except DVDError as e:
                if str(e) != "Invalid angle specified!":
                    raise
-            if fh.tell()==0:
+            close_file_del_if_empty(fh)
-                fh.close()
+    to_demux = list(filter(os.path.isfile, to_demux))
                os.unlink(fh.name)
                while fh.name in to_demux:
                    to_demux.remove(fh.name)
    for file in to_demux:
        demux(file)
        os.unlink(file)
-    for file in glob(os.path.join(out_folder,"*.m2v")):
+    process_m2v_files(base_dir)
        make_d2v(file)
--- a/dvdnav.py
+++ b/dvdnav.py
@ -1,8 +1,10 @@
 import cffi
 import os
 import functools
 import os
 from datetime import timedelta
 import cffi
 from tqdm import tqdm
 from dvdread import DVDRead
@ -13,6 +15,29 @@ def loadlib(dll_path, *includes, **kwargs):
    return ffi, ffi.dlopen(dll_path)
 domains = {
    0: "None",
    1: "FirstPlay",
    2: "VTSTitle",
    4: "VMGM",
    8: "VTSMenu",
 }
 events = {
    0: "DVDNAV_BLOCK_OK",
    1: "DVDNAV_NOP",
    2: "DVDNAV_STILL_FRAME",
    3: "DVDNAV_SPU_STREAM_CHANGE",
    4: "DVDNAV_AUDIO_STREAM_CHANGE",
    5: "DVDNAV_VTS_CHANGE",
    6: "DVDNAV_CELL_CHANGE",
    7: "DVDNAV_NAV_PACKET",
    8: "DVDNAV_STOP",
    9: "DVDNAV_HIGHLIGHT",
    10: "DVDNAV_SPU_CLUT_CHANGE",
    12: "DVDNAV_HOP_CHANNEL",
    13: "DVDNAV_WAIT",
 }
 class DVDError(Exception):
    pass
@ -67,27 +92,6 @@ class DVDNav(object):
        size = self.ffi.new("int32_t*", 0)
        pos = self.ffi.new("uint32_t*", 0)
        total_size = self.ffi.new("uint32_t*", 0)
        domains = {
            1: "FirstPlay",
            2: "VTSTitle",
            4: "VMGM",
            8: "VTSMenu",
        }
        events = {
            0: "DVDNAV_BLOCK_OK",
            1: "DVDNAV_NOP",
            2: "DVDNAV_STILL_FRAME",
            3: "DVDNAV_SPU_STREAM_CHANGE",
            4: "DVDNAV_AUDIO_STREAM_CHANGE",
            5: "DVDNAV_VTS_CHANGE",
            6: "DVDNAV_CELL_CHANGE",
            7: "DVDNAV_NAV_PACKET",
            8: "DVDNAV_STOP",
            9: "DVDNAV_HIGHLIGHT",
            10: "DVDNAV_SPU_CLUT_CHANGE",
            12: "DVDNAV_HOP_CHANNEL",
            13: "DVDNAV_WAIT",
        }
        progbar = tqdm(
            unit_divisor=1024,
            unit_scale=True,
@ -96,7 +100,8 @@ class DVDNav(object):
            disable=False,
        )
        ripped = set()
-        current_vts = None
+        cells = set()
        current_vts = (None,None)
        current_cell = None
        current_pg = None
        while True:
@ -143,26 +148,36 @@ class DVDNav(object):
                current_cell = cell.cellN
                current_pg = cell.pgN
                progbar.write(
-                    f"[{title}|{angle}] Cell: {cell.cellN} ({cell.cell_start}-{cell.cell_start+cell.cell_length}), PG: {cell.pgN} ({cell.pg_start}-{cell.pg_start+cell.pg_length})"
+                    f"[{title}|{angle}] Cell: {cell.cellN} ({hex(cell.cell_start)}-{hex(cell.cell_start+cell.cell_length)}), PG: {cell.pgN} ({hex(cell.pg_start)}-{hex(cell.pg_start+cell.pg_length)})"
                )
                fp=(current_vts[0],current_vts[1],cell.cellN,cell.pgN,cell.cell_length,cell.pg_length,cell.pgc_length,cell.cell_start,cell.pg_start)
                if fp in cells:
                    progbar.write(f"[{title}|{angle}] Cells Looped!")
                    break
                cells.add(fp)
            elif ev[0] == self.lib.DVDNAV_VTS_CHANGE:
                vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf)
                old_domain = domains[vts.old_domain]
                new_domain = domains[vts.new_domain]
                new_vts = (vts.new_vtsN, vts.new_domain)
                old_vts = (vts.old_vtsN, vts.old_domain)
                ripped.add((vts.old_vtsN, vts.old_domain))
-                # progbar.write(f"[{title}|{angle}] VTS: {vts.old_vtsN} ({vts.old_domain} {old_domain}) -> {vts.new_vtsN} ({vts.new_domain} {new_domain})")
+                cells.clear()
-                if new_vts in ripped:  # looped
+                progbar.write(f"[{title}|{angle}] VTS: {vts.old_vtsN} ({vts.old_domain} {old_domain}) -> {vts.new_vtsN} ({vts.new_domain} {new_domain})")
-                    progbar.write(f"[{title}|{angle}] Looped!")
+                if (new_vts in ripped) or new_vts==old_vts:  # looped
                    progbar.write(f"[{title}|{angle}] VTS Looped!")
                    break
                current_vts = (vts.new_vtsN, vts.new_domain)
                if vts.new_domain == 8:  # back to menu
-                    progbar.write(f"[{title}|{angle}] Back to menu!")
+                    progbar.write(f"[{title}|{angle}] VTS Back to menu!")
                    break
-                yield vts.new_vtsN
+                # yield vts.new_vtsN
            else:
                progbar.write(
                    f"[{title}|{angle}] Unhandled: {events.get(ev[0],ev[0])} {size[0]}"
                )
        self.__check_error(self.lib.dvdnav_stop(self.dvd))
        progbar.close()
    def __check_error(self, ret):
        if ret == self.lib.DVDNAV_STATUS_ERR:
@ -171,6 +186,44 @@ class DVDNav(object):
                raise DVDError(err)
            raise DVDError("Unknown error")
    def __get_vts(self,title):
        buf = self.ffi.new("char[]", 4096)
        ev = self.ffi.new("int32_t*", self.lib.DVDNAV_NOP)
        size = self.ffi.new("int32_t*", 0)
        pos = self.ffi.new("uint32_t*", 0)
        total_size = self.ffi.new("uint32_t*", 0)
        self.__check_error(self.lib.dvdnav_set_PGC_positioning_flag(self.dvd, 1))
        self.__check_error(self.lib.dvdnav_title_play(self.dvd, title))
        seq=[]
        while True:
            self.__check_error(self.lib.dvdnav_get_next_block(self.dvd, buf, ev, size))
            if ev[0] == self.lib.DVDNAV_BLOCK_OK:
                self.__check_error(self.lib.dvdnav_get_position(self.dvd, pos, total_size))
                # print(title,pos[0],total_size[0])
                if self.lib.dvdnav_next_pg_search(self.dvd)==0:
                    break
            elif ev[0] == self.lib.DVDNAV_STOP:
                break
            elif ev[0] == self.lib.DVDNAV_STILL_FRAME:
                self.__check_error(self.lib.dvdnav_still_skip(self.dvd))
            elif ev[0] == self.lib.DVDNAV_WAIT:
                self.__check_error(self.lib.dvdnav_wait_skip(self.dvd))
            elif ev[0] == self.lib.DVDNAV_VTS_CHANGE:
                vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf)
                old_domain = domains[vts.old_domain]
                new_domain = domains[vts.new_domain]
                seq.append(
                    (vts.new_vtsN, new_domain)
                )
                if vts.new_domain==8:
                    break
                continue
            # print(title,ev[0],size[0])
        self.__check_error(self.lib.dvdnav_stop(self.dvd))
        # print(title,seq)
        return seq
        # self.__check_error(self.lib.dvdnav_next_pg_search(self.dvd))
    def __get_titles(self):
        titles = self.ffi.new("int32_t*", 0)
        p_times = self.ffi.new("uint64_t[]", 512)
@ -191,6 +244,8 @@ class DVDNav(object):
            if duration[0] == 0:
                continue
            chapters = []
            if num_chapters==0 and times[0]==self.ffi.NULL:
                chapters=None
            for t in range(num_chapters):
                chapters.append(timedelta(seconds=times[0][t] / 90000))
            self.titles[title] = {
@ -220,6 +275,7 @@ class DVDNav(object):
            self.__check_error(self.lib.dvdnav_title_play(self.dvd, title))
            self.titles[title]["audio"] = {}
            self.titles[title]["subtitles"] = {}
            # self.titles[title]["vts"] = self.__get_vts(title)
            for n in range(255):
                stream_id = self.lib.dvdnav_get_audio_logical_stream(self.dvd, n)
                if stream_id == -1:
@ -241,8 +297,7 @@ class DVDNav(object):
                    3: "director's commentary",
                    4: "alternate director's commentary",
                }[audio_attrs.code_extension]
-                self.titles[title]["audio"][n] = {
+                self.titles[title]["audio"][stream_id] = {
                    "stream_id": stream_id,
                    "lang": alang,
                    "channels": channels,
                    "codec": codec,
@ -258,8 +313,8 @@ class DVDNav(object):
                slang = None
                if spu_attr.type == 1:
                    slang = str(spu_attr.lang_code.to_bytes(2, "big"), "utf8")
-                self.titles[title]["subtitles"][n] = {
+                self.titles[title]["subtitles"][stream_id] = {
                    "stream_id": stream_id,
                    "lang": slang,
                }
        self.__check_error(self.lib.dvdnav_stop(self.dvd))
        # exit("DEBUG!")
--- a/dvdread.py
+++ b/dvdread.py
@ -1,9 +1,10 @@
 import cffi
 import os
 import functools
 import binascii
 import functools
 import os
 from datetime import timedelta
 import cffi
 def loadlib(dll_path, *includes, **kwargs):
    ffi = cffi.FFI()
@ -55,6 +56,7 @@ class DVDRead(object):
            pbar.update(num_read)
            yield self.ffi.buffer(buf,num_read)[:]
        self.lib.DVDCloseFile(fh)
        pbar.close()
    def grab_ifos(self):
        vmg_ifo = self.lib.ifoOpen(self.dvd, 0)
--- a/ff_d2v.py
+++ b/ff_d2v.py
@ -1,9 +1,18 @@
-import sys
+import itertools as ITT
 import json
 import os
 import subprocess as SP
-import itertools as ITT
+import sys
 from pprint import pprint
 from fractions import Fraction
 from glob import glob
 from collections import Counter
 from tqdm import tqdm
 from time import perf_counter
 def pulldown(fields_per_second, frames_per_second):
    f = Fraction(fields_per_second, frames_per_second)
 colorspace = {
@ -59,7 +68,7 @@ def make_flags(frames):
 def make_line(frames, stream):
    info = f"{make_info(frames):03x}"
-    matrix = colorspace[stream["color_space"]]
+    matrix = colorspace[stream.get("color_space", "unknown")]
    file = 0
    position = frames[0]["pkt_pos"]
    skip = 0
@ -69,7 +78,170 @@ def make_line(frames, stream):
    return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags]))
 def __make_dict(line):
    ret = {}
    line = line.strip().split("|")
    line_type = line[0]
    for value in line[1:]:
        entry = ret
        if "=" not in value:
            continue
        key_path, value = value.split("=")
        key_path = key_path.split(".")
        for key in key_path[:-1]:
            if ":" in key:
                key = key.split(":")[1]
            entry = entry.setdefault(key, {})
        entry[key_path[-1]] = value
    return {line_type: ret}
 def judge(info, num_frames):
    threshold = 1  # BFF/TFF threshold value
    min_num_frames = 250  # minimal number of frames
    idet = info["frame"]["lavfi"]["idet"]
    idet_v = {}
    for t in "repeated", "single", "multiple":
        idet_v[t] = {}
        for k, v in idet[t].items():
            try:
                idet_v[t][k] = int(v)
            except ValueError:
                try:
                    idet_v[t][k] = float(v)
                except ValueError:
                    pass
    idet = {
        "repeat": {k: v for k, v in idet_v["repeated"].items()},
        "single": {k: v for k, v in idet_v["single"].items()},
        "multiple": {k: v for k, v in idet_v["multiple"].items()},
    }
    repeat_err = abs(
        (idet["repeat"]["neither"] / num_frames) - 0.8
    )  # 2:3 pulldown,4 frames @ ~24 FPS to ~30 FPS = 20% repeated fields
    print(f"Derivation from 2:3 Pulldown: {repeat_err:.2%}")
    tff = idet["multiple"]["tff"]
    bff = idet["multiple"]["bff"]
    progressive = idet["multiple"]["progressive"]
    interlaced = tff + bff
    determined = interlaced + progressive
    print(f"Determined: {determined}")
    if interlaced:
        print(f"Interlaced: {interlaced} (TFF: {tff/interlaced:.2%}, BFF: {bff/interlaced:.2%}) = {interlaced/determined:.2%}")
    else:
        print(f"Interlaced: {interlaced} = {interlaced/determined:.2%}")
    print(f"Progressive: {progressive} = {progressive/determined:.2%}")
    if determined == 0:
        return idet
    idet["num_frames"] = num_frames
    idet["interlaced"] = interlaced
    idet["progressive"] = progressive
    if determined < 50 or determined < min_num_frames:
        print("/!\\ Not enough information to determine interlacing type reliably, results may be inacurate /!\\")
    if interlaced > progressive:
        if tff > bff:
            if repeat_err < 1.0:
                idet["vid_type"] = "Telecined TFF"
            else:
                idet["vid_type"] = "Interlaced TFF"
        elif bff > tff:
            if repeat_err < 1.0:
                idet["vid_type"] = "Telecined BFF"
            else:
                idet["vid_type"] = "Interlaced BFF"
        else:
            idet["vid_type"] = "Interlaced?"
    else:
        idet["vid_type"] = "Progressive"
    print(f"Result: {idet['vid_type']}")
    return idet
 def get_meta_interlacing(path):
    path = path.replace("\\", "/")
    filtergraph = [
        f"movie=\\'{path}\\'",
        "cropdetect=limit=0.5:round=2",
        "idet",
    ]
    proc = SP.Popen(
        [
            "ffprobe",
            "-loglevel",
            "fatal",
            "-probesize",
            str(0x7FFFFFFF),
            "-analyzeduration",
            str(0x7FFFFFFF),
            "-f",
            "lavfi",
            "-i",
            ",".join(filtergraph),
            "-select_streams",
            "v",
            "-show_frames",
            "-show_streams",
            "-print_format",
            "compact",
        ],
        stdout=SP.PIPE,
        stdin=SP.DEVNULL,
        bufsize=0,
        encoding="utf8",
    )
    total_size = int(get_streams(path)[1]["size"])
    data = {}
    pbar = tqdm(
        total=total_size,
        desc="Analyzing video",
        unit_divisor=1024,
        unit_scale=True,
        unit="iB",
        leave=False,
    )
    frame_num = 0
    from pprint import pformat, pprint
    pattern = []
    for line in proc.stdout:
        line = __make_dict(line)
        data.update(line)
        if "frame" in line:
            frame_num += 1
            pbar.n = max(pbar.n, min(total_size, int(line["frame"]["pkt_pos"])))
            dt = pbar._time() - pbar.start_t
            if dt:
                pbar.set_postfix(frame=frame_num, fps=f"{frame_num / dt:.2f}")
            idet = line["frame"].get("lavfi", {}).get("idet")
            # rep = idet["repeated"]["current_frame"]
            # single = idet["single"]["current_frame"]
            # multi = idet["multiple"]["current_frame"]
            # pbar.write(repr((rep, single, multi)))
            pbar.update(0)
    pbar.close()
    ret = proc.wait()
    if ret != 0:
        exit(ret)
    stream = data["stream"]
    # 30000/1001
    frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
    frame_rate = Fraction(frame_rate[0], frame_rate[1])
    frame_num = int(stream["nb_read_frames"])
    cropdetect = data["frame"]["lavfi"]["cropdetect"]
    idet = judge(data, frame_num)
    crop = (
        int(cropdetect["x"]),
        (int(stream["width"]) - int(cropdetect["w"]) - int(cropdetect["x"])),
        int(cropdetect["y"]),
        (int(stream["height"]) - int(cropdetect["h"]) - int(cropdetect["y"])),
    )
    print(f"Cropping: {crop}")
    return {"interlacing":idet, "crop":crop}
 def get_frames(path):
    path = path.replace("\\", "/")
    proc = SP.Popen(
        [
            "ffprobe",
@ -90,16 +262,13 @@ def get_frames(path):
        stdout=SP.PIPE,
        stdin=SP.DEVNULL,
        bufsize=0,
        encoding="utf8",
    )
    data = None
    for line in proc.stdout:
-        line = str(line, "utf8").strip().split("|")
+        yield __make_dict(line)
        line = {line[0]: dict(v.split("=") for v in line[1:])}
        yield line
    ret = proc.wait()
    if ret != 0:
        exit(ret)
    return data
 def get_streams(path):
@ -175,31 +344,87 @@ def gen_d2v(path):
        unit_scale=True,
        unit="iB",
        desc="Writing d2v",
        leave=False,
    )
    cropdetect = None
    idet = None
    frame_num = 0
    t_start=perf_counter()
    for line in prog_bar:
        if "frame" not in line:
            continue
        frame = line["frame"]
        prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
        prog_bar.update(0)
        if frame["stream_index"] != stream["index"]:
            continue
        prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
        fps=frame_num/(perf_counter()-t_start)
        prog_bar.set_postfix(frame=frame_num,fps=f"{fps:.02f}")
        prog_bar.update(0)
        frame_num += 1
        if frame["pict_type"] == "I" and line_buffer:
            yield make_line(line_buffer, stream)
            line_buffer.clear()
        line_buffer.append(frame)
    if line_buffer:
        yield make_line(line_buffer, stream)
    prog_bar.n = int(fmt["size"])
    prog_bar.update(0)
    prog_bar.close()
    yield None
 def make_meta(path):
    outdir = os.path.dirname(path)
    outfile = os.path.splitext(os.path.basename(path))[0]
    outfile = os.path.join(outdir, os.path.extsep.join([outfile, "info", "json"]))
    if os.path.isfile(outfile):
        print(path,"already analyzed, skipping")
        return
    print("Analyzing", path)
    meta = get_meta_interlacing(path)
    streams, fmt = get_streams(path)
    stream = streams[0]
    var = Fraction(int(stream["width"]), int(stream["height"]))
    dar = Fraction(*map(int, stream["display_aspect_ratio"].split(":")))
    sar = Fraction(*map(int, stream["sample_aspect_ratio"].split(":")))
    par = sar * dar
    meta.update(
        {
            "par": [par.numerator, par.denominator],
            "dar": [dar.numerator, dar.denominator],
            "sar": [sar.numerator, sar.denominator],
            "var": [var.numerator, var.denominator],
        }
    )
    print(f"Aspect ratios:")
    print(f"    Pixel   {par}")
    print(f"    Display {dar}")
    print(f"    Screen  {sar}")
    print(f"    Video   {var}")
    with open(outfile, "w") as fh:
        json.dump(meta, fh, indent=4)
 def make_d2v(path):
    outdir = os.path.dirname(path)
    outfile = os.path.splitext(os.path.basename(path))[0]
-    outfile = os.path.extsep.join([outfile, "d2v"])
+    outfile = os.path.join(outdir, os.path.extsep.join([outfile, "d2v"]))
    outfile_tmp = os.path.extsep.join([outfile, "tmp"])
    if os.path.isfile(outfile):
        print(path,"already indexed, skipping")
        return
    print("Indexing", path)
    a, b = ITT.tee(gen_d2v(path))
    next(b)
-    with open(outfile, "w") as fh:
+    with open(outfile_tmp, "w") as fh:
        for line, next_line in zip(a, b):
            fh.write(line)
            if next_line is None:  # last line, append end marker
                fh.write(" ff")
            fh.write("\n")
    os.rename(outfile_tmp,outfile)
 if __name__ == "__main__":
    for file in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
        make_meta(file)
        make_d2v(file)
--- a/vob_demux.py
+++ b/vob_demux.py
@ -1,7 +1,8 @@
 import sys
 import os
 import json
 import os
 import subprocess as SP
 import sys
 import shutil
 def get_streams(path):
@ -31,22 +32,35 @@ def get_streams(path):
        return [], {}
    return data["streams"], data["format"]
 def ccextract(files):
    ccextractor = shutil.which("ccextractor") or shutil.which("ccextractorwinfull")
    if ccextractor is None and os.name=="nt":
            ccextractor=os.path.expandvars(os.path.join("${PROGRAMFILES(X86)}","CCExtractor","ccextractorwinfull.exe"))
    if not os.path.isfile(ccextractor):
        print("WARNING: CCExtractor not found")
        return []
    new_files=[]
    for file in files:
        outfile=os.path.splitext(file)[0]
        outfile=os.path.extsep.join([outfile, "cc.srt"])
        ret=SP.call([ccextractor, "-sc", "-sbs", "-autodash", "-trim","-nobom","-o", outfile, file])
        if ret==10:
            if os.path.isfile(outfile):
                os.unlink(outfile)
                continue
        new_files.append(outfile)
    return new_files
 types = {
    "mpeg2video": "m2v",
    "ac3": "ac3",
    "dvd_subtitle": "sup",
 }
 types = {"mpeg2video": "m2v", "ac3": "ac3", "dvd_subtitle": "sub.mkv", "eia_608": "srt"}
 def demux(path):
    folder = os.path.dirname(path)
    basename = os.path.splitext(os.path.basename(path))[0]
-    streams, fmt = get_streams(path)
+    streams, _ = get_streams(path)
    cmd = [
        "ffmpeg",
        "-y",
        # "-fflags","+genpts+igndts",
        "-probesize",
        str(0x7FFFFFFF),
        "-analyzeduration",
@ -55,33 +69,39 @@ def demux(path):
        path,
        "-strict",
        "-2",
    ]
    caption_files = []
    for stream in streams:
        codec = stream["codec_name"]
        ext = types.get(codec, codec)
        hex_id = stream["id"]
        codec_name = stream["codec_long_name"]
        outfile = os.path.join(folder, f"{basename}_{hex_id}")
        if codec == "dvd_nav_packet":
            continue
        outfile = os.path.extsep.join([outfile, ext])
        print(hex_id, codec_name, codec)
        if codec == "mpeg2video":
            caption_files.append(outfile)
        cmd += [
            "-map",
            f"0:#{hex_id}",
            "-vcodec",
            "copy",
            "-acodec",
            "copy",
            "-scodec",
            "copy",
            "-strict",
            "-2",
            outfile,
        ]
    need_ffmpeg = False
    for stream in streams:
        codec = stream["codec_name"]
        ext = types.get(codec, codec)
        idx = stream["index"]
        hex_id = stream["id"]
        codec_name = stream["codec_long_name"]
        outfile = os.path.join(folder, f"{basename}_{idx}_{hex_id}")
        if codec=="dvd_nav_packet":
            continue
        print(idx, hex_id, codec_name, codec)
        if codec == "dvd_subtitle":
            SP.check_call([
                "mencoder",path,"-vobsuboutindex",str(idx),"-vobsubout", outfile,"-nosound","-ovc", "copy", "-o",os.devnull
            ])
            continue
        cmd += ["-map", f"0:#{hex_id}", "-strict", "-2", outfile + f".{ext}"]
        need_ffmpeg = True
    if need_ffmpeg:
    SP.check_call(cmd)
    ccextract(caption_files)
    for file in os.listdir(folder):
        if os.path.isfile(file):
            if os.stat(file).st_size==0:
                os.unlink(file)
 if __name__ == "__main__":
    demux(sys.argv[1])