Switch back to underline for invalid characters, and make restricted ASCII-only
This commit is contained in:
parent
ed7516c69d
commit
56781d3d2e
5 changed files with 21 additions and 13 deletions
|
@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
|
||||||
%(extractor)s for the provider (youtube, metacafe,
|
%(extractor)s for the provider (youtube, metacafe,
|
||||||
etc), %(id)s for the video id and %% for a literal
|
etc), %(id)s for the video id and %% for a literal
|
||||||
percent. Use - to output to stdout.
|
percent. Use - to output to stdout.
|
||||||
--restrict-filenames Avoid some characters such as "&" and spaces in
|
--restrict-filenames Restrict filenames to only ASCII characters, and
|
||||||
filenames
|
avoid "&" and spaces in filenames
|
||||||
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
|
||||||
-w, --no-overwrites do not overwrite files
|
-w, --no-overwrites do not overwrite files
|
||||||
-c, --continue resume partially downloaded files
|
-c, --continue resume partially downloaded files
|
||||||
|
|
|
@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(sanitize_filename(u'123'), u'123')
|
self.assertEqual(sanitize_filename(u'123'), u'123')
|
||||||
|
|
||||||
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
|
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
|
||||||
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
|
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
|
||||||
|
|
||||||
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
|
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))
|
||||||
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
|
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
|
||||||
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
|
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
|
||||||
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
|
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
|
||||||
|
@ -45,14 +45,17 @@ class TestUtil(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
|
self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
|
||||||
|
|
||||||
self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
|
self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))
|
||||||
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
|
self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
|
||||||
|
|
||||||
self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
|
self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
|
||||||
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
|
self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
|
||||||
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
|
self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
|
||||||
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
|
self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_filename(u'aäb', restricted=True), u'a_b')
|
||||||
|
self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename
|
||||||
|
|
||||||
forbidden = u'"\0\\/&: \'\t\n'
|
forbidden = u'"\0\\/&: \'\t\n'
|
||||||
for fc in forbidden:
|
for fc in forbidden:
|
||||||
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
|
print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
|
||||||
|
|
|
@ -59,8 +59,8 @@ redistribute it or use it however you like.
|
||||||
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
|
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
|
||||||
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
|
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
|
||||||
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
|
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
|
||||||
--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
|
--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and
|
||||||
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
|
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames
|
||||||
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
|
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
|
||||||
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
|
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
|
||||||
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
|
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
|
||||||
|
|
|
@ -274,7 +274,7 @@ def parseOpts():
|
||||||
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
|
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
|
||||||
filesystem.add_option('--restrict-filenames',
|
filesystem.add_option('--restrict-filenames',
|
||||||
action='store_true', dest='restrictfilenames',
|
action='store_true', dest='restrictfilenames',
|
||||||
help='Avoid some characters such as "&" and spaces in filenames', default=False)
|
help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)
|
||||||
filesystem.add_option('-a', '--batch-file',
|
filesystem.add_option('-a', '--batch-file',
|
||||||
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
|
||||||
filesystem.add_option('-w', '--no-overwrites',
|
filesystem.add_option('-w', '--no-overwrites',
|
||||||
|
|
|
@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False):
|
||||||
elif char == ':':
|
elif char == ':':
|
||||||
return '_-' if restricted else ' -'
|
return '_-' if restricted else ' -'
|
||||||
elif char in '\\/|*<>':
|
elif char in '\\/|*<>':
|
||||||
return '-'
|
return '_'
|
||||||
if restricted and (char in '&\'' or char.isspace()):
|
if restricted and (char in '&\'' or char.isspace()):
|
||||||
return '_'
|
return '_'
|
||||||
|
if restricted and ord(char) > 127:
|
||||||
|
return '_'
|
||||||
return char
|
return char
|
||||||
|
|
||||||
result = u''.join(map(replace_insane, s))
|
result = u''.join(map(replace_insane, s))
|
||||||
while '--' in result:
|
while '__' in result:
|
||||||
result = result.replace('--', '-')
|
result = result.replace('__', '_')
|
||||||
return result.strip('-')
|
result = result.strip('_')
|
||||||
|
if not result:
|
||||||
|
result = '_'
|
||||||
|
return result
|
||||||
|
|
||||||
def orderedSet(iterable):
|
def orderedSet(iterable):
|
||||||
""" Remove all duplicates from the input iterable """
|
""" Remove all duplicates from the input iterable """
|
||||||
|
|
Loading…
Reference in a new issue