parent
							
								
									672f1bd849
								
							
						
					
					
						commit
						bf6427d2fb
					
				
					 3 changed files with 116 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -58,6 +58,8 @@ from youtube_dl.utils import (
 | 
			
		|||
    xpath_text,
 | 
			
		||||
    render_table,
 | 
			
		||||
    match_str,
 | 
			
		||||
    parse_dfxp_time_expr,
 | 
			
		||||
    dfxp2srt,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -581,6 +583,42 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
 | 
			
		|||
            'like_count > 100 & dislike_count <? 50 & description',
 | 
			
		||||
            {'like_count': 190, 'dislike_count': 10}))
 | 
			
		||||
 | 
			
		||||
    def test_parse_dfxp_time_expr(self):
 | 
			
		||||
        self.assertEqual(parse_dfxp_time_expr(None), 0.0)
 | 
			
		||||
        self.assertEqual(parse_dfxp_time_expr(''), 0.0)
 | 
			
		||||
        self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1)
 | 
			
		||||
        self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1)
 | 
			
		||||
        self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0)
 | 
			
		||||
        self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1)
 | 
			
		||||
 | 
			
		||||
    def test_dfxp2srt(self):
 | 
			
		||||
        dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?>
 | 
			
		||||
            <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter">
 | 
			
		||||
            <body>
 | 
			
		||||
                <div xml:lang="en">
 | 
			
		||||
                    <p begin="0" end="1">The following line contains Chinese characters and special symbols</p>
 | 
			
		||||
                    <p begin="1" end="2">第二行<br/>♪♪</p>
 | 
			
		||||
                    <p begin="2" end="3"><span>Third<br/>Line</span></p>
 | 
			
		||||
                </div>
 | 
			
		||||
            </body>
 | 
			
		||||
            </tt>'''
 | 
			
		||||
        srt_data = '''1
 | 
			
		||||
00:00:00,000 --> 00:00:01,000
 | 
			
		||||
The following line contains Chinese characters and special symbols
 | 
			
		||||
 | 
			
		||||
2
 | 
			
		||||
00:00:01,000 --> 00:00:02,000
 | 
			
		||||
第二行
 | 
			
		||||
♪♪
 | 
			
		||||
 | 
			
		||||
3
 | 
			
		||||
00:00:02,000 --> 00:00:03,000
 | 
			
		||||
Third
 | 
			
		||||
Line
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
        self.assertEqual(dfxp2srt(dfxp_data), srt_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    unittest.main()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -20,6 +20,7 @@ from ..utils import (
 | 
			
		|||
    prepend_extension,
 | 
			
		||||
    shell_quote,
 | 
			
		||||
    subtitles_filename,
 | 
			
		||||
    dfxp2srt,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -651,6 +652,30 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
 | 
			
		|||
                    'format' % new_ext)
 | 
			
		||||
                continue
 | 
			
		||||
            new_file = subtitles_filename(filename, lang, new_ext)
 | 
			
		||||
 | 
			
		||||
            if ext == 'dfxp' or ext == 'ttml':
 | 
			
		||||
                self._downloader.report_warning(
 | 
			
		||||
                    'You have requested to convert dfxp (TTML) subtitles into another format, '
 | 
			
		||||
                    'which results in style information loss')
 | 
			
		||||
 | 
			
		||||
                dfxp_file = subtitles_filename(filename, lang, ext)
 | 
			
		||||
                srt_file = subtitles_filename(filename, lang, 'srt')
 | 
			
		||||
 | 
			
		||||
                with io.open(dfxp_file, 'rt', encoding='utf-8') as f:
 | 
			
		||||
                    srt_data = dfxp2srt(f.read())
 | 
			
		||||
 | 
			
		||||
                with io.open(srt_file, 'wt', encoding='utf-8') as f:
 | 
			
		||||
                    f.write(srt_data)
 | 
			
		||||
 | 
			
		||||
                ext = 'srt'
 | 
			
		||||
                subs[lang] = {
 | 
			
		||||
                    'ext': 'srt',
 | 
			
		||||
                    'data': srt_data
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if new_ext == 'srt':
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            self.run_ffmpeg(
 | 
			
		||||
                subtitles_filename(filename, lang, ext),
 | 
			
		||||
                new_file, ['-f', new_format])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1800,6 +1800,59 @@ def match_filter_func(filter_str):
 | 
			
		|||
    return _match_func
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_dfxp_time_expr(time_expr):
 | 
			
		||||
    if not time_expr:
 | 
			
		||||
        return 0.0
 | 
			
		||||
 | 
			
		||||
    mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
 | 
			
		||||
    if mobj:
 | 
			
		||||
        return float(mobj.group('time_offset'))
 | 
			
		||||
 | 
			
		||||
    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
 | 
			
		||||
    if mobj:
 | 
			
		||||
        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def format_srt_time(seconds):
 | 
			
		||||
    (mins, secs) = divmod(seconds, 60)
 | 
			
		||||
    (hours, mins) = divmod(mins, 60)
 | 
			
		||||
    millisecs = (secs - int(secs)) * 1000
 | 
			
		||||
    secs = int(secs)
 | 
			
		||||
    return '%02d:%02d:%02d,%03d' % (hours, mins, secs, millisecs)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dfxp2srt(dfxp_data):
 | 
			
		||||
    _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
 | 
			
		||||
 | 
			
		||||
    def parse_node(node):
 | 
			
		||||
        str_or_empty = functools.partial(str_or_none, default='')
 | 
			
		||||
 | 
			
		||||
        out = str_or_empty(node.text)
 | 
			
		||||
 | 
			
		||||
        for child in node:
 | 
			
		||||
            if child.tag == _x('ttml:br'):
 | 
			
		||||
                out += '\n' + str_or_empty(child.tail)
 | 
			
		||||
            elif child.tag == _x('ttml:span'):
 | 
			
		||||
                out += str_or_empty(parse_node(child))
 | 
			
		||||
            else:
 | 
			
		||||
                out += str_or_empty(xml.etree.ElementTree.tostring(child))
 | 
			
		||||
 | 
			
		||||
        return out
 | 
			
		||||
 | 
			
		||||
    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
 | 
			
		||||
    out = []
 | 
			
		||||
    paras = dfxp.findall(_x('.//ttml:p'))
 | 
			
		||||
 | 
			
		||||
    for para, index in zip(paras, itertools.count(1)):
 | 
			
		||||
        out.append('%d\n%s --> %s\n%s\n\n' % (
 | 
			
		||||
            index,
 | 
			
		||||
            format_srt_time(parse_dfxp_time_expr(para.attrib.get('begin'))),
 | 
			
		||||
            format_srt_time(parse_dfxp_time_expr(para.attrib.get('end'))),
 | 
			
		||||
            parse_node(para)))
 | 
			
		||||
 | 
			
		||||
    return ''.join(out)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 | 
			
		||||
    def __init__(self, proxies=None):
 | 
			
		||||
        # Set default handlers
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue