Pushed latest changes, add Telecine/Interlacing detection to ff_d2v.py

This commit is contained in:
Daniel S. 2022-01-07 18:27:31 +01:00
parent e06f1dfad5
commit 5e4d9d6965
11 changed files with 2403 additions and 2039 deletions

10
.gitignore vendored
View file

@ -138,3 +138,13 @@ dmypy.json
# Cython debug symbols # Cython debug symbols
cython_debug/ cython_debug/
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/

View file

@ -2,9 +2,27 @@
Choggbuster is a set of python scripts aimed at automated preprocessing of video DVDs for archival and filtering Choggbuster is a set of python scripts aimed at automated preprocessing of video DVDs for archival and filtering
# Requirements
- python (obviously)
- libdvdcss (for decrypting copy protected DVDs)
- libdvdnav (for streaming the VOBs to disk)
- libdvdread (for reading decrypted data off of DVDs)
- ffmpeg (for demuxing)
- ccextractor (for extracting DVD Subtitles)
# Setup (Windows) # Setup (Windows)
1. Clone the repo 1. Clone the repo
2. `pip install cffi tqdm` 2. `pip install cffi tqdm`
3. Grab [libdvdread, libdvdnav](https://www.videolan.org/developers/libdvdnav.html) and [libdvdcss](https://www.videolan.org/developers/libdvdcss.html) from VLC and drop them next to `dvd_ripper.py` 3. Grab [libdvdread, libdvdnav](https://www.videolan.org/developers/libdvdnav.html) and [libdvdcss](https://www.videolan.org/developers/libdvdcss.html) from VLC and drop them next to `dvd_ripper.py`
4. `python dvd_ripper.py F:\` or `python dvd_ripper.py D:\path\to\DVD.ISO` 4. `python dvd_ripper.py F:\` or `python dvd_ripper.py D:\path\to\DVD.ISO`
5. this will create a folder `out` with a subfolder for the disc containing:
- JSON file with metadata for the DVD title (`XXXX.json` where `X` is the title number)
- demuxed streams (`tXXX_aYYY_Z_0xAAA.{ext}` where `X` is the title number, `Y` is the angle number `Z` is stream index and `AAA` is the stream id)
- `.m2v` for video
- `.ac3` or `.dtx` for audio
- `.sub` and `.idx` for subtitles
- `.srt` for captions
- `.d2v` file for use with AviSynth and Vapoursynth D2V reader
- `.info.json` file containing video stream metadata (cropping information, interlaced/progressive frame count and aspect ration information)

View file

@ -1,65 +1,99 @@
import cffi import itertools as ITT
import json
import os import os
import subprocess as SP
import sys import sys
import time import time
from dvdnav import DVDNav,DVDError
from dvdread import DVDRead
import subprocess as SP
import json
from glob import glob from glob import glob
import itertools as ITT
from vob_demux import demux
from ff_d2v import make_d2v
def loadlib(dll_path, *includes, **kwargs): import cffi
ffi = cffi.FFI() from datetime import timedelta
for include in includes: from dvdnav import DVDError, DVDNav
ffi.cdef(open(include).read(), kwargs) from dvdread import DVDRead
return ffi, ffi.dlopen(dll_path) from ff_d2v import make_d2v, make_meta
from vob_demux import demux
def close_file_del_if_empty(fh):
if not fh:
return False
if fh.tell() == 0:
fh.close()
os.unlink(fh.name)
return False
else:
fh.close()
return True
dur_thr = 60.0
def process_m2v_files(path):
for file in glob(os.path.join(path,"**", "*.m2v")):
make_meta(file)
make_d2v(file)
for dvd_path in ITT.chain.from_iterable(map(glob, sys.argv[1:])): for dvd_path in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
r = DVDRead(dvd_path) r = DVDRead(dvd_path)
# r.grab_ifos() # r.grab_ifos()
# r.grab_vobs() # r.grab_vobs()
# exit() # exit()
if os.path.isfile(dvd_path):
out_folder = os.path.join( basename = os.path.splitext(os.path.basename(dvd_path))[0]
"out", "_".join([r.disc_id, r.udf_disc_name or r.iso_disc_name]).replace(" ", "_") else:
) basename = r.iso_disc_name or r.udf_disc_name
os.makedirs(out_folder, exist_ok=True) base_dir = os.path.join("out", "_".join([basename, r.disc_id]).replace(" ", "_"))
if os.path.isdir(base_dir):
print(f"Output foldrer {base_dir} exists, remove to re-rip DVD")
process_m2v_files(base_dir)
continue
os.makedirs(base_dir, exist_ok=True)
d = DVDNav(dvd_path) d = DVDNav(dvd_path)
to_demux = [] to_demux = []
for k, v in d.titles.items(): for k, v in d.titles.items():
out_folder=os.path.join(base_dir,f"t{k:03}")
v["duration"] = v["duration"].total_seconds() v["duration"] = v["duration"].total_seconds()
v["chapters"] = [c.total_seconds() for c in v["chapters"]] if v["chapters"]:
v["chapters"] = [0.0]+[c.total_seconds() for c in v["chapters"]]
avg_chapter_len = v["duration"] / len(v["chapters"])
# if avg_chapter_len<10:
# continue
d.titles[k] = v d.titles[k] = v
with open(os.path.join(out_folder, f"{k:03}.json"), "w") as fh: # if not v.get('audio'):
json.dump(v, fh) # print(f"[{k}|0] Skipping title {k} because it has no audio tracks")
# continue
# if not v.get('vts'):
# print(f"[{k}|0] Skipping title {k} because it has no title sets")
# continue
if v["duration"] < dur_thr:
print(
f"[{k}|0] Skipping title {k} because it is shorter than {dur_thr} seconds ({v['duration']} seconds)"
)
continue
os.makedirs(out_folder, exist_ok=True)
with open(os.path.join(out_folder, f"title.json"), "w") as fh:
json.dump(d.titles[k], fh, indent=4)
with open(os.path.join(out_folder, f"chapters.txt"), "w") as fh:
if set(v["chapters"])==set([0.0]):
continue
for n,t in enumerate(v["chapters"],1):
if abs(t-v["duration"])<1.0:
continue
print(f"CHAPTER{n:02}={timedelta(seconds=t)}",file=fh)
print(f"CHAPTER{n:02}NAME=Chapter {n}",file=fh)
for a in range(0, 99): for a in range(0, 99):
block=0 outfile = os.path.join(out_folder, f"{a:03}.vob")
outfile = os.path.join(out_folder, f"t{k:03}_a{a:03}_b{block:03}.vob")
to_demux.append(outfile) to_demux.append(outfile)
fh = open(outfile, "wb") fh = open(outfile, "wb")
try: try:
for block in d.get_blocks(k, a): for block in d.get_blocks(k, a):
if isinstance(block, int):
outfile = os.path.join(out_folder, f"t{k:03}_a{a:03}_b{block:03}.vob")
to_demux.append(outfile)
if fh:
fh.close()
fh = open(outfile, "wb")
else:
fh.write(block) fh.write(block)
except DVDError as e: except DVDError as e:
if str(e) != "Invalid angle specified!": if str(e) != "Invalid angle specified!":
raise raise
if fh.tell()==0: close_file_del_if_empty(fh)
fh.close() to_demux = list(filter(os.path.isfile, to_demux))
os.unlink(fh.name)
while fh.name in to_demux:
to_demux.remove(fh.name)
for file in to_demux: for file in to_demux:
demux(file) demux(file)
os.unlink(file) os.unlink(file)
for file in glob(os.path.join(out_folder,"*.m2v")): process_m2v_files(base_dir)
make_d2v(file)

123
dvdnav.py
View file

@ -1,8 +1,10 @@
import cffi
import os
import functools import functools
import os
from datetime import timedelta from datetime import timedelta
import cffi
from tqdm import tqdm from tqdm import tqdm
from dvdread import DVDRead from dvdread import DVDRead
@ -13,6 +15,29 @@ def loadlib(dll_path, *includes, **kwargs):
return ffi, ffi.dlopen(dll_path) return ffi, ffi.dlopen(dll_path)
domains = {
0: "None",
1: "FirstPlay",
2: "VTSTitle",
4: "VMGM",
8: "VTSMenu",
}
events = {
0: "DVDNAV_BLOCK_OK",
1: "DVDNAV_NOP",
2: "DVDNAV_STILL_FRAME",
3: "DVDNAV_SPU_STREAM_CHANGE",
4: "DVDNAV_AUDIO_STREAM_CHANGE",
5: "DVDNAV_VTS_CHANGE",
6: "DVDNAV_CELL_CHANGE",
7: "DVDNAV_NAV_PACKET",
8: "DVDNAV_STOP",
9: "DVDNAV_HIGHLIGHT",
10: "DVDNAV_SPU_CLUT_CHANGE",
12: "DVDNAV_HOP_CHANNEL",
13: "DVDNAV_WAIT",
}
class DVDError(Exception): class DVDError(Exception):
pass pass
@ -67,27 +92,6 @@ class DVDNav(object):
size = self.ffi.new("int32_t*", 0) size = self.ffi.new("int32_t*", 0)
pos = self.ffi.new("uint32_t*", 0) pos = self.ffi.new("uint32_t*", 0)
total_size = self.ffi.new("uint32_t*", 0) total_size = self.ffi.new("uint32_t*", 0)
domains = {
1: "FirstPlay",
2: "VTSTitle",
4: "VMGM",
8: "VTSMenu",
}
events = {
0: "DVDNAV_BLOCK_OK",
1: "DVDNAV_NOP",
2: "DVDNAV_STILL_FRAME",
3: "DVDNAV_SPU_STREAM_CHANGE",
4: "DVDNAV_AUDIO_STREAM_CHANGE",
5: "DVDNAV_VTS_CHANGE",
6: "DVDNAV_CELL_CHANGE",
7: "DVDNAV_NAV_PACKET",
8: "DVDNAV_STOP",
9: "DVDNAV_HIGHLIGHT",
10: "DVDNAV_SPU_CLUT_CHANGE",
12: "DVDNAV_HOP_CHANNEL",
13: "DVDNAV_WAIT",
}
progbar = tqdm( progbar = tqdm(
unit_divisor=1024, unit_divisor=1024,
unit_scale=True, unit_scale=True,
@ -96,7 +100,8 @@ class DVDNav(object):
disable=False, disable=False,
) )
ripped = set() ripped = set()
current_vts = None cells = set()
current_vts = (None,None)
current_cell = None current_cell = None
current_pg = None current_pg = None
while True: while True:
@ -143,26 +148,36 @@ class DVDNav(object):
current_cell = cell.cellN current_cell = cell.cellN
current_pg = cell.pgN current_pg = cell.pgN
progbar.write( progbar.write(
f"[{title}|{angle}] Cell: {cell.cellN} ({cell.cell_start}-{cell.cell_start+cell.cell_length}), PG: {cell.pgN} ({cell.pg_start}-{cell.pg_start+cell.pg_length})" f"[{title}|{angle}] Cell: {cell.cellN} ({hex(cell.cell_start)}-{hex(cell.cell_start+cell.cell_length)}), PG: {cell.pgN} ({hex(cell.pg_start)}-{hex(cell.pg_start+cell.pg_length)})"
) )
fp=(current_vts[0],current_vts[1],cell.cellN,cell.pgN,cell.cell_length,cell.pg_length,cell.pgc_length,cell.cell_start,cell.pg_start)
if fp in cells:
progbar.write(f"[{title}|{angle}] Cells Looped!")
break
cells.add(fp)
elif ev[0] == self.lib.DVDNAV_VTS_CHANGE: elif ev[0] == self.lib.DVDNAV_VTS_CHANGE:
vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf) vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf)
old_domain = domains[vts.old_domain]
new_domain = domains[vts.new_domain]
new_vts = (vts.new_vtsN, vts.new_domain) new_vts = (vts.new_vtsN, vts.new_domain)
old_vts = (vts.old_vtsN, vts.old_domain)
ripped.add((vts.old_vtsN, vts.old_domain)) ripped.add((vts.old_vtsN, vts.old_domain))
# progbar.write(f"[{title}|{angle}] VTS: {vts.old_vtsN} ({vts.old_domain} {old_domain}) -> {vts.new_vtsN} ({vts.new_domain} {new_domain})") cells.clear()
if new_vts in ripped: # looped progbar.write(f"[{title}|{angle}] VTS: {vts.old_vtsN} ({vts.old_domain} {old_domain}) -> {vts.new_vtsN} ({vts.new_domain} {new_domain})")
progbar.write(f"[{title}|{angle}] Looped!") if (new_vts in ripped) or new_vts==old_vts: # looped
progbar.write(f"[{title}|{angle}] VTS Looped!")
break break
current_vts = (vts.new_vtsN, vts.new_domain) current_vts = (vts.new_vtsN, vts.new_domain)
if vts.new_domain == 8: # back to menu if vts.new_domain == 8: # back to menu
progbar.write(f"[{title}|{angle}] Back to menu!") progbar.write(f"[{title}|{angle}] VTS Back to menu!")
break break
yield vts.new_vtsN # yield vts.new_vtsN
else: else:
progbar.write( progbar.write(
f"[{title}|{angle}] Unhandled: {events.get(ev[0],ev[0])} {size[0]}" f"[{title}|{angle}] Unhandled: {events.get(ev[0],ev[0])} {size[0]}"
) )
self.__check_error(self.lib.dvdnav_stop(self.dvd)) self.__check_error(self.lib.dvdnav_stop(self.dvd))
progbar.close()
def __check_error(self, ret): def __check_error(self, ret):
if ret == self.lib.DVDNAV_STATUS_ERR: if ret == self.lib.DVDNAV_STATUS_ERR:
@ -171,6 +186,44 @@ class DVDNav(object):
raise DVDError(err) raise DVDError(err)
raise DVDError("Unknown error") raise DVDError("Unknown error")
def __get_vts(self,title):
buf = self.ffi.new("char[]", 4096)
ev = self.ffi.new("int32_t*", self.lib.DVDNAV_NOP)
size = self.ffi.new("int32_t*", 0)
pos = self.ffi.new("uint32_t*", 0)
total_size = self.ffi.new("uint32_t*", 0)
self.__check_error(self.lib.dvdnav_set_PGC_positioning_flag(self.dvd, 1))
self.__check_error(self.lib.dvdnav_title_play(self.dvd, title))
seq=[]
while True:
self.__check_error(self.lib.dvdnav_get_next_block(self.dvd, buf, ev, size))
if ev[0] == self.lib.DVDNAV_BLOCK_OK:
self.__check_error(self.lib.dvdnav_get_position(self.dvd, pos, total_size))
# print(title,pos[0],total_size[0])
if self.lib.dvdnav_next_pg_search(self.dvd)==0:
break
elif ev[0] == self.lib.DVDNAV_STOP:
break
elif ev[0] == self.lib.DVDNAV_STILL_FRAME:
self.__check_error(self.lib.dvdnav_still_skip(self.dvd))
elif ev[0] == self.lib.DVDNAV_WAIT:
self.__check_error(self.lib.dvdnav_wait_skip(self.dvd))
elif ev[0] == self.lib.DVDNAV_VTS_CHANGE:
vts = self.ffi.cast("dvdnav_vts_change_event_t*", buf)
old_domain = domains[vts.old_domain]
new_domain = domains[vts.new_domain]
seq.append(
(vts.new_vtsN, new_domain)
)
if vts.new_domain==8:
break
continue
# print(title,ev[0],size[0])
self.__check_error(self.lib.dvdnav_stop(self.dvd))
# print(title,seq)
return seq
# self.__check_error(self.lib.dvdnav_next_pg_search(self.dvd))
def __get_titles(self): def __get_titles(self):
titles = self.ffi.new("int32_t*", 0) titles = self.ffi.new("int32_t*", 0)
p_times = self.ffi.new("uint64_t[]", 512) p_times = self.ffi.new("uint64_t[]", 512)
@ -191,6 +244,8 @@ class DVDNav(object):
if duration[0] == 0: if duration[0] == 0:
continue continue
chapters = [] chapters = []
if num_chapters==0 and times[0]==self.ffi.NULL:
chapters=None
for t in range(num_chapters): for t in range(num_chapters):
chapters.append(timedelta(seconds=times[0][t] / 90000)) chapters.append(timedelta(seconds=times[0][t] / 90000))
self.titles[title] = { self.titles[title] = {
@ -220,6 +275,7 @@ class DVDNav(object):
self.__check_error(self.lib.dvdnav_title_play(self.dvd, title)) self.__check_error(self.lib.dvdnav_title_play(self.dvd, title))
self.titles[title]["audio"] = {} self.titles[title]["audio"] = {}
self.titles[title]["subtitles"] = {} self.titles[title]["subtitles"] = {}
# self.titles[title]["vts"] = self.__get_vts(title)
for n in range(255): for n in range(255):
stream_id = self.lib.dvdnav_get_audio_logical_stream(self.dvd, n) stream_id = self.lib.dvdnav_get_audio_logical_stream(self.dvd, n)
if stream_id == -1: if stream_id == -1:
@ -241,8 +297,7 @@ class DVDNav(object):
3: "director's commentary", 3: "director's commentary",
4: "alternate director's commentary", 4: "alternate director's commentary",
}[audio_attrs.code_extension] }[audio_attrs.code_extension]
self.titles[title]["audio"][n] = { self.titles[title]["audio"][stream_id] = {
"stream_id": stream_id,
"lang": alang, "lang": alang,
"channels": channels, "channels": channels,
"codec": codec, "codec": codec,
@ -258,8 +313,8 @@ class DVDNav(object):
slang = None slang = None
if spu_attr.type == 1: if spu_attr.type == 1:
slang = str(spu_attr.lang_code.to_bytes(2, "big"), "utf8") slang = str(spu_attr.lang_code.to_bytes(2, "big"), "utf8")
self.titles[title]["subtitles"][n] = { self.titles[title]["subtitles"][stream_id] = {
"stream_id": stream_id,
"lang": slang, "lang": slang,
} }
self.__check_error(self.lib.dvdnav_stop(self.dvd)) self.__check_error(self.lib.dvdnav_stop(self.dvd))
# exit("DEBUG!")

View file

@ -1,9 +1,10 @@
import cffi
import os
import functools
import binascii import binascii
import functools
import os
from datetime import timedelta from datetime import timedelta
import cffi
def loadlib(dll_path, *includes, **kwargs): def loadlib(dll_path, *includes, **kwargs):
ffi = cffi.FFI() ffi = cffi.FFI()
@ -55,6 +56,7 @@ class DVDRead(object):
pbar.update(num_read) pbar.update(num_read)
yield self.ffi.buffer(buf,num_read)[:] yield self.ffi.buffer(buf,num_read)[:]
self.lib.DVDCloseFile(fh) self.lib.DVDCloseFile(fh)
pbar.close()
def grab_ifos(self): def grab_ifos(self):
vmg_ifo = self.lib.ifoOpen(self.dvd, 0) vmg_ifo = self.lib.ifoOpen(self.dvd, 0)

249
ff_d2v.py
View file

@ -1,9 +1,18 @@
import sys import itertools as ITT
import json import json
import os import os
import subprocess as SP import subprocess as SP
import itertools as ITT import sys
from pprint import pprint
from fractions import Fraction
from glob import glob
from collections import Counter
from tqdm import tqdm from tqdm import tqdm
from time import perf_counter
def pulldown(fields_per_second, frames_per_second):
f = Fraction(fields_per_second, frames_per_second)
colorspace = { colorspace = {
@ -59,7 +68,7 @@ def make_flags(frames):
def make_line(frames, stream): def make_line(frames, stream):
info = f"{make_info(frames):03x}" info = f"{make_info(frames):03x}"
matrix = colorspace[stream["color_space"]] matrix = colorspace[stream.get("color_space", "unknown")]
file = 0 file = 0
position = frames[0]["pkt_pos"] position = frames[0]["pkt_pos"]
skip = 0 skip = 0
@ -69,7 +78,170 @@ def make_line(frames, stream):
return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags])) return " ".join(map(str, [info, matrix, file, position, skip, vob, cell, *flags]))
def __make_dict(line):
ret = {}
line = line.strip().split("|")
line_type = line[0]
for value in line[1:]:
entry = ret
if "=" not in value:
continue
key_path, value = value.split("=")
key_path = key_path.split(".")
for key in key_path[:-1]:
if ":" in key:
key = key.split(":")[1]
entry = entry.setdefault(key, {})
entry[key_path[-1]] = value
return {line_type: ret}
def judge(info, num_frames):
threshold = 1 # BFF/TFF threshold value
min_num_frames = 250 # minimal number of frames
idet = info["frame"]["lavfi"]["idet"]
idet_v = {}
for t in "repeated", "single", "multiple":
idet_v[t] = {}
for k, v in idet[t].items():
try:
idet_v[t][k] = int(v)
except ValueError:
try:
idet_v[t][k] = float(v)
except ValueError:
pass
idet = {
"repeat": {k: v for k, v in idet_v["repeated"].items()},
"single": {k: v for k, v in idet_v["single"].items()},
"multiple": {k: v for k, v in idet_v["multiple"].items()},
}
repeat_err = abs(
(idet["repeat"]["neither"] / num_frames) - 0.8
) # 2:3 pulldown,4 frames @ ~24 FPS to ~30 FPS = 20% repeated fields
print(f"Derivation from 2:3 Pulldown: {repeat_err:.2%}")
tff = idet["multiple"]["tff"]
bff = idet["multiple"]["bff"]
progressive = idet["multiple"]["progressive"]
interlaced = tff + bff
determined = interlaced + progressive
print(f"Determined: {determined}")
if interlaced:
print(f"Interlaced: {interlaced} (TFF: {tff/interlaced:.2%}, BFF: {bff/interlaced:.2%}) = {interlaced/determined:.2%}")
else:
print(f"Interlaced: {interlaced} = {interlaced/determined:.2%}")
print(f"Progressive: {progressive} = {progressive/determined:.2%}")
if determined == 0:
return idet
idet["num_frames"] = num_frames
idet["interlaced"] = interlaced
idet["progressive"] = progressive
if determined < 50 or determined < min_num_frames:
print("/!\\ Not enough information to determine interlacing type reliably, results may be inacurate /!\\")
if interlaced > progressive:
if tff > bff:
if repeat_err < 1.0:
idet["vid_type"] = "Telecined TFF"
else:
idet["vid_type"] = "Interlaced TFF"
elif bff > tff:
if repeat_err < 1.0:
idet["vid_type"] = "Telecined BFF"
else:
idet["vid_type"] = "Interlaced BFF"
else:
idet["vid_type"] = "Interlaced?"
else:
idet["vid_type"] = "Progressive"
print(f"Result: {idet['vid_type']}")
return idet
def get_meta_interlacing(path):
path = path.replace("\\", "/")
filtergraph = [
f"movie=\\'{path}\\'",
"cropdetect=limit=0.5:round=2",
"idet",
]
proc = SP.Popen(
[
"ffprobe",
"-loglevel",
"fatal",
"-probesize",
str(0x7FFFFFFF),
"-analyzeduration",
str(0x7FFFFFFF),
"-f",
"lavfi",
"-i",
",".join(filtergraph),
"-select_streams",
"v",
"-show_frames",
"-show_streams",
"-print_format",
"compact",
],
stdout=SP.PIPE,
stdin=SP.DEVNULL,
bufsize=0,
encoding="utf8",
)
total_size = int(get_streams(path)[1]["size"])
data = {}
pbar = tqdm(
total=total_size,
desc="Analyzing video",
unit_divisor=1024,
unit_scale=True,
unit="iB",
leave=False,
)
frame_num = 0
from pprint import pformat, pprint
pattern = []
for line in proc.stdout:
line = __make_dict(line)
data.update(line)
if "frame" in line:
frame_num += 1
pbar.n = max(pbar.n, min(total_size, int(line["frame"]["pkt_pos"])))
dt = pbar._time() - pbar.start_t
if dt:
pbar.set_postfix(frame=frame_num, fps=f"{frame_num / dt:.2f}")
idet = line["frame"].get("lavfi", {}).get("idet")
# rep = idet["repeated"]["current_frame"]
# single = idet["single"]["current_frame"]
# multi = idet["multiple"]["current_frame"]
# pbar.write(repr((rep, single, multi)))
pbar.update(0)
pbar.close()
ret = proc.wait()
if ret != 0:
exit(ret)
stream = data["stream"]
# 30000/1001
frame_rate = list(map(int, stream["r_frame_rate"].split("/")))
frame_rate = Fraction(frame_rate[0], frame_rate[1])
frame_num = int(stream["nb_read_frames"])
cropdetect = data["frame"]["lavfi"]["cropdetect"]
idet = judge(data, frame_num)
crop = (
int(cropdetect["x"]),
(int(stream["width"]) - int(cropdetect["w"]) - int(cropdetect["x"])),
int(cropdetect["y"]),
(int(stream["height"]) - int(cropdetect["h"]) - int(cropdetect["y"])),
)
print(f"Cropping: {crop}")
return {"interlacing":idet, "crop":crop}
def get_frames(path): def get_frames(path):
path = path.replace("\\", "/")
proc = SP.Popen( proc = SP.Popen(
[ [
"ffprobe", "ffprobe",
@ -90,16 +262,13 @@ def get_frames(path):
stdout=SP.PIPE, stdout=SP.PIPE,
stdin=SP.DEVNULL, stdin=SP.DEVNULL,
bufsize=0, bufsize=0,
encoding="utf8",
) )
data = None
for line in proc.stdout: for line in proc.stdout:
line = str(line, "utf8").strip().split("|") yield __make_dict(line)
line = {line[0]: dict(v.split("=") for v in line[1:])}
yield line
ret = proc.wait() ret = proc.wait()
if ret != 0: if ret != 0:
exit(ret) exit(ret)
return data
def get_streams(path): def get_streams(path):
@ -175,31 +344,87 @@ def gen_d2v(path):
unit_scale=True, unit_scale=True,
unit="iB", unit="iB",
desc="Writing d2v", desc="Writing d2v",
leave=False,
) )
cropdetect = None
idet = None
frame_num = 0
t_start=perf_counter()
for line in prog_bar: for line in prog_bar:
if "frame" not in line: if "frame" not in line:
continue continue
frame = line["frame"] frame = line["frame"]
prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
prog_bar.update(0)
if frame["stream_index"] != stream["index"]: if frame["stream_index"] != stream["index"]:
continue continue
prog_bar.n = min(max(prog_bar.n, int(frame["pkt_pos"])), int(fmt["size"]))
fps=frame_num/(perf_counter()-t_start)
prog_bar.set_postfix(frame=frame_num,fps=f"{fps:.02f}")
prog_bar.update(0)
frame_num += 1
if frame["pict_type"] == "I" and line_buffer: if frame["pict_type"] == "I" and line_buffer:
yield make_line(line_buffer, stream) yield make_line(line_buffer, stream)
line_buffer.clear() line_buffer.clear()
line_buffer.append(frame) line_buffer.append(frame)
if line_buffer:
yield make_line(line_buffer, stream)
prog_bar.n = int(fmt["size"])
prog_bar.update(0)
prog_bar.close() prog_bar.close()
yield None yield None
def make_meta(path):
outdir = os.path.dirname(path)
outfile = os.path.splitext(os.path.basename(path))[0]
outfile = os.path.join(outdir, os.path.extsep.join([outfile, "info", "json"]))
if os.path.isfile(outfile):
print(path,"already analyzed, skipping")
return
print("Analyzing", path)
meta = get_meta_interlacing(path)
streams, fmt = get_streams(path)
stream = streams[0]
var = Fraction(int(stream["width"]), int(stream["height"]))
dar = Fraction(*map(int, stream["display_aspect_ratio"].split(":")))
sar = Fraction(*map(int, stream["sample_aspect_ratio"].split(":")))
par = sar * dar
meta.update(
{
"par": [par.numerator, par.denominator],
"dar": [dar.numerator, dar.denominator],
"sar": [sar.numerator, sar.denominator],
"var": [var.numerator, var.denominator],
}
)
print(f"Aspect ratios:")
print(f" Pixel {par}")
print(f" Display {dar}")
print(f" Screen {sar}")
print(f" Video {var}")
with open(outfile, "w") as fh:
json.dump(meta, fh, indent=4)
def make_d2v(path): def make_d2v(path):
outdir = os.path.dirname(path)
outfile = os.path.splitext(os.path.basename(path))[0] outfile = os.path.splitext(os.path.basename(path))[0]
outfile = os.path.extsep.join([outfile, "d2v"]) outfile = os.path.join(outdir, os.path.extsep.join([outfile, "d2v"]))
outfile_tmp = os.path.extsep.join([outfile, "tmp"])
if os.path.isfile(outfile):
print(path,"already indexed, skipping")
return
print("Indexing", path)
a, b = ITT.tee(gen_d2v(path)) a, b = ITT.tee(gen_d2v(path))
next(b) next(b)
with open(outfile, "w") as fh: with open(outfile_tmp, "w") as fh:
for line, next_line in zip(a, b): for line, next_line in zip(a, b):
fh.write(line) fh.write(line)
if next_line is None: # last line, append end marker if next_line is None: # last line, append end marker
fh.write(" ff") fh.write(" ff")
fh.write("\n") fh.write("\n")
os.rename(outfile_tmp,outfile)
if __name__ == "__main__":
for file in ITT.chain.from_iterable(map(glob, sys.argv[1:])):
make_meta(file)
make_d2v(file)

View file

@ -1,7 +1,8 @@
import sys
import os
import json import json
import os
import subprocess as SP import subprocess as SP
import sys
import shutil
def get_streams(path): def get_streams(path):
@ -31,22 +32,35 @@ def get_streams(path):
return [], {} return [], {}
return data["streams"], data["format"] return data["streams"], data["format"]
def ccextract(files):
ccextractor = shutil.which("ccextractor") or shutil.which("ccextractorwinfull")
if ccextractor is None and os.name=="nt":
ccextractor=os.path.expandvars(os.path.join("${PROGRAMFILES(X86)}","CCExtractor","ccextractorwinfull.exe"))
if not os.path.isfile(ccextractor):
print("WARNING: CCExtractor not found")
return []
new_files=[]
for file in files:
outfile=os.path.splitext(file)[0]
outfile=os.path.extsep.join([outfile, "cc.srt"])
ret=SP.call([ccextractor, "-sc", "-sbs", "-autodash", "-trim","-nobom","-o", outfile, file])
if ret==10:
if os.path.isfile(outfile):
os.unlink(outfile)
continue
new_files.append(outfile)
return new_files
types = {
"mpeg2video": "m2v",
"ac3": "ac3",
"dvd_subtitle": "sup",
}
types = {"mpeg2video": "m2v", "ac3": "ac3", "dvd_subtitle": "sub.mkv", "eia_608": "srt"}
def demux(path): def demux(path):
folder = os.path.dirname(path) folder = os.path.dirname(path)
basename = os.path.splitext(os.path.basename(path))[0] basename = os.path.splitext(os.path.basename(path))[0]
streams, fmt = get_streams(path) streams, _ = get_streams(path)
cmd = [ cmd = [
"ffmpeg", "ffmpeg",
"-y", "-y",
# "-fflags","+genpts+igndts",
"-probesize", "-probesize",
str(0x7FFFFFFF), str(0x7FFFFFFF),
"-analyzeduration", "-analyzeduration",
@ -55,33 +69,39 @@ def demux(path):
path, path,
"-strict", "-strict",
"-2", "-2",
]
caption_files = []
for stream in streams:
codec = stream["codec_name"]
ext = types.get(codec, codec)
hex_id = stream["id"]
codec_name = stream["codec_long_name"]
outfile = os.path.join(folder, f"{basename}_{hex_id}")
if codec == "dvd_nav_packet":
continue
outfile = os.path.extsep.join([outfile, ext])
print(hex_id, codec_name, codec)
if codec == "mpeg2video":
caption_files.append(outfile)
cmd += [
"-map",
f"0:#{hex_id}",
"-vcodec", "-vcodec",
"copy", "copy",
"-acodec", "-acodec",
"copy", "copy",
"-scodec", "-scodec",
"copy", "copy",
"-strict",
"-2",
outfile,
] ]
need_ffmpeg = False
for stream in streams:
codec = stream["codec_name"]
ext = types.get(codec, codec)
idx = stream["index"]
hex_id = stream["id"]
codec_name = stream["codec_long_name"]
outfile = os.path.join(folder, f"{basename}_{idx}_{hex_id}")
if codec=="dvd_nav_packet":
continue
print(idx, hex_id, codec_name, codec)
if codec == "dvd_subtitle":
SP.check_call([
"mencoder",path,"-vobsuboutindex",str(idx),"-vobsubout", outfile,"-nosound","-ovc", "copy", "-o",os.devnull
])
continue
cmd += ["-map", f"0:#{hex_id}", "-strict", "-2", outfile + f".{ext}"]
need_ffmpeg = True
if need_ffmpeg:
SP.check_call(cmd) SP.check_call(cmd)
ccextract(caption_files)
for file in os.listdir(folder):
if os.path.isfile(file):
if os.stat(file).st_size==0:
os.unlink(file)
if __name__ == "__main__": if __name__ == "__main__":
demux(sys.argv[1]) demux(sys.argv[1])