fixed pygments package to work with python3 using 2to3

This commit is contained in:
Alan Hamlett 2013-09-22 14:50:04 -07:00
parent be54a19207
commit af0dce46aa
26 changed files with 387 additions and 390 deletions

View file

@ -135,7 +135,7 @@ def _print_help(what, name):
def _print_list(what):
if what == 'lexer':
print
print()
print("Lexers:")
print("~~~~~~~")
@ -149,7 +149,7 @@ def _print_list(what):
print(('* %s\n %s %s') % i)
elif what == 'formatter':
print
print()
print("Formatters:")
print("~~~~~~~~~~~")
@ -164,7 +164,7 @@ def _print_list(what):
print(('* %s\n %s %s') % i)
elif what == 'filter':
print
print()
print("Filters:")
print("~~~~~~~~")
@ -174,7 +174,7 @@ def _print_list(what):
print(" %s" % docstring_headline(cls))
elif what == 'style':
print
print()
print("Styles:")
print("~~~~~~~")
@ -237,7 +237,7 @@ def main(args=sys.argv):
print(usage, file=sys.stderr)
return 2
# print(version)
# print version
main(['', '-V'])
if not args:
args = ['lexer', 'formatter', 'filter', 'style']
@ -396,7 +396,8 @@ def main(args=sys.argv):
except ClassNotFound:
lexer = TextLexer(**parsed_opts)
elif not lexer:
print('Error: no lexer name given and reading from stdin (try using -g or -l <lexer>)', file=sys.stderr)
print('Error: no lexer name given and reading ' + \
'from stdin (try using -g or -l <lexer>)', file=sys.stderr)
return 2
else:
code = sys.stdin.read()
@ -432,7 +433,7 @@ def main(args=sys.argv):
if len(info) >= 3:
# extract relevant file and position info
msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:]
print('', file=sys.stderr)
print(file=sys.stderr)
print('*** Error while highlighting:', file=sys.stderr)
print(msg, file=sys.stderr)
return 1

View file

@ -117,7 +117,7 @@ class KeywordCaseFilter(Filter):
def __init__(self, **options):
Filter.__init__(self, **options)
case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower')
self.convert = getattr(unicode, case)
self.convert = getattr(str, case)
def filter(self, lexer, stream):
for ttype, value in stream:
@ -235,9 +235,9 @@ class VisibleWhitespaceFilter(Filter):
def __init__(self, **options):
Filter.__init__(self, **options)
for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u''}.items():
for name, default in list({'spaces': '·', 'tabs': '»', 'newlines': ''}.items()):
opt = options.get(name, False)
if isinstance(opt, basestring) and len(opt) == 1:
if isinstance(opt, str) and len(opt) == 1:
setattr(self, name, opt)
else:
setattr(self, name, (opt and default or ''))

View file

@ -18,7 +18,7 @@ __all__ = ['Formatter']
def _lookup_style(style):
if isinstance(style, basestring):
if isinstance(style, str):
return get_style_by_name(style)
return style

View file

@ -12,7 +12,7 @@
import os
import sys
import os.path
import StringIO
import io
from pygments.formatter import Formatter
from pygments.token import Token, Text, STANDARD_TYPES
@ -27,11 +27,11 @@ __all__ = ['HtmlFormatter']
_escape_html_table = {
ord('&'): u'&amp;',
ord('<'): u'&lt;',
ord('>'): u'&gt;',
ord('"'): u'&quot;',
ord("'"): u'&#39;',
ord('&'): '&amp;',
ord('<'): '&lt;',
ord('>'): '&gt;',
ord('"'): '&quot;',
ord("'"): '&#39;',
}
def escape_html(text, table=_escape_html_table):
@ -453,7 +453,7 @@ class HtmlFormatter(Formatter):
"""
if arg is None:
arg = ('cssclass' in self.options and '.'+self.cssclass or '')
if isinstance(arg, basestring):
if isinstance(arg, str):
args = [arg]
else:
args = list(arg)
@ -467,7 +467,7 @@ class HtmlFormatter(Formatter):
return ', '.join(tmp)
styles = [(level, ttype, cls, style)
for cls, (style, ttype, level) in self.class2style.iteritems()
for cls, (style, ttype, level) in self.class2style.items()
if cls and style]
styles.sort()
lines = ['%s { %s } /* %s */' % (prefix(cls), style, repr(ttype)[6:])
@ -505,7 +505,8 @@ class HtmlFormatter(Formatter):
cssfilename = os.path.join(os.path.dirname(filename),
self.cssfile)
except AttributeError:
print('Note: Cannot determine output file name, using current directory as base for the CSS file name', file=sys.stderr)
print('Note: Cannot determine output file name, ' \
'using current directory as base for the CSS file name', file=sys.stderr)
cssfilename = self.cssfile
# write CSS file only if noclobber_cssfile isn't given as an option.
try:
@ -514,7 +515,7 @@ class HtmlFormatter(Formatter):
cf.write(CSSFILE_TEMPLATE %
{'styledefs': self.get_style_defs('body')})
cf.close()
except IOError, err:
except IOError as err:
err.strerror = 'Error writing CSS file: ' + err.strerror
raise
@ -533,7 +534,7 @@ class HtmlFormatter(Formatter):
yield 0, DOC_FOOTER
def _wrap_tablelinenos(self, inner):
dummyoutfile = StringIO.StringIO()
dummyoutfile = io.StringIO()
lncount = 0
for t, line in inner:
if t:

View file

@ -23,7 +23,7 @@ except ImportError:
pil_available = False
try:
import _winreg
import winreg
except ImportError:
_winreg = None
@ -72,7 +72,7 @@ class FontManager(object):
self._create_nix()
def _get_nix_font_path(self, name, style):
from commands import getstatusoutput
from subprocess import getstatusoutput
exit, out = getstatusoutput('fc-list "%s:style=%s" file' %
(name, style))
if not exit:
@ -107,7 +107,7 @@ class FontManager(object):
for style in styles:
try:
valname = '%s%s%s' % (basename, style and ' '+style, suffix)
val, _ = _winreg.QueryValueEx(key, valname)
val, _ = winreg.QueryValueEx(key, valname)
return val
except EnvironmentError:
continue
@ -119,13 +119,13 @@ class FontManager(object):
def _create_win(self):
try:
key = _winreg.OpenKey(
_winreg.HKEY_LOCAL_MACHINE,
key = winreg.OpenKey(
winreg.HKEY_LOCAL_MACHINE,
r'Software\Microsoft\Windows NT\CurrentVersion\Fonts')
except EnvironmentError:
try:
key = _winreg.OpenKey(
_winreg.HKEY_LOCAL_MACHINE,
key = winreg.OpenKey(
winreg.HKEY_LOCAL_MACHINE,
r'Software\Microsoft\Windows\CurrentVersion\Fonts')
except EnvironmentError:
raise FontNotFound('Can\'t open Windows font registry key')
@ -142,7 +142,7 @@ class FontManager(object):
else:
self.fonts[style] = self.fonts['NORMAL']
finally:
_winreg.CloseKey(key)
winreg.CloseKey(key)
def get_char_size(self):
"""
@ -452,7 +452,7 @@ class ImageFormatter(Formatter):
"""
if not self.line_numbers:
return
for p in xrange(self.maxlineno):
for p in range(self.maxlineno):
n = p + self.line_number_start
if (n % self.line_number_step) == 0:
self._draw_linenumber(p, n)

View file

@ -291,7 +291,7 @@ class LatexFormatter(Formatter):
"""
cp = self.commandprefix
styles = []
for name, definition in self.cmd2def.iteritems():
for name, definition in self.cmd2def.items():
styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' %
(cp, name, definition))
return STYLE_TEMPLATE % {'cp': self.commandprefix,
@ -306,24 +306,24 @@ class LatexFormatter(Formatter):
realoutfile = outfile
outfile = StringIO()
outfile.write(ur'\begin{Verbatim}[commandchars=\\\{\}')
outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}')
if self.linenos:
start, step = self.linenostart, self.linenostep
outfile.write(u',numbers=left' +
(start and u',firstnumber=%d' % start or u'') +
(step and u',stepnumber=%d' % step or u''))
outfile.write(',numbers=left' +
(start and ',firstnumber=%d' % start or '') +
(step and ',stepnumber=%d' % step or ''))
if self.mathescape or self.texcomments:
outfile.write(ur',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}')
outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}')
if self.verboptions:
outfile.write(u',' + self.verboptions)
outfile.write(u']\n')
outfile.write(',' + self.verboptions)
outfile.write(']\n')
for ttype, value in tokensource:
if ttype in Token.Comment:
if self.texcomments:
# Try to guess comment starting lexeme and escape it ...
start = value[0:1]
for i in xrange(1, len(value)):
for i in range(1, len(value)):
if start[0] != value[i]:
break
start += value[i]
@ -366,7 +366,7 @@ class LatexFormatter(Formatter):
else:
outfile.write(value)
outfile.write(u'\\end{Verbatim}\n')
outfile.write('\\end{Verbatim}\n')
if self.full:
realoutfile.write(DOC_TEMPLATE %

View file

@ -15,6 +15,7 @@ from pygments.filters import get_filter_by_name
from pygments.token import Error, Text, Other, _TokenType
from pygments.util import get_bool_opt, get_int_opt, get_list_opt, \
make_analysator
import collections
__all__ = ['Lexer', 'RegexLexer', 'ExtendedRegexLexer', 'DelegatingLexer',
@ -42,7 +43,7 @@ class LexerMeta(type):
return type.__new__(cls, name, bases, d)
class Lexer(object):
class Lexer(object, metaclass=LexerMeta):
"""
Lexer for a specific language.
@ -84,8 +85,6 @@ class Lexer(object):
#: Priority, should multiple lexers match and no content is provided
priority = 0
__metaclass__ = LexerMeta
def __init__(self, **options):
self.options = options
self.stripnl = get_bool_opt(options, 'stripnl', True)
@ -136,12 +135,12 @@ class Lexer(object):
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
"""
if not isinstance(text, unicode):
if not isinstance(text, str):
if self.encoding == 'guess':
try:
text = text.decode('utf-8')
if text.startswith(u'\ufeff'):
text = text[len(u'\ufeff'):]
if text.startswith('\ufeff'):
text = text[len('\ufeff'):]
except UnicodeDecodeError:
text = text.decode('latin1')
elif self.encoding == 'chardet':
@ -155,20 +154,20 @@ class Lexer(object):
decoded = None
for bom, encoding in _encoding_map:
if text.startswith(bom):
decoded = unicode(text[len(bom):], encoding,
decoded = str(text[len(bom):], encoding,
errors='replace')
break
# no BOM found, so use chardet
if decoded is None:
enc = chardet.detect(text[:1024]) # Guess using first 1KB
decoded = unicode(text, enc.get('encoding') or 'utf-8',
decoded = str(text, enc.get('encoding') or 'utf-8',
errors='replace')
text = decoded
else:
text = text.decode(self.encoding)
else:
if text.startswith(u'\ufeff'):
text = text[len(u'\ufeff'):]
if text.startswith('\ufeff'):
text = text[len('\ufeff'):]
# text now *is* a unicode string
text = text.replace('\r\n', '\n')
@ -391,7 +390,7 @@ class RegexLexerMeta(LexerMeta):
def _process_token(cls, token):
"""Preprocess the token component of a token definition."""
assert type(token) is _TokenType or callable(token), \
assert type(token) is _TokenType or isinstance(token, collections.Callable), \
'token type must be simple type or callable, not %r' % (token,)
return token
@ -472,7 +471,7 @@ class RegexLexerMeta(LexerMeta):
"""Preprocess a dictionary of token definitions."""
processed = cls._all_tokens[name] = {}
tokendefs = tokendefs or cls.tokens[name]
for state in tokendefs.keys():
for state in list(tokendefs.keys()):
cls._process_state(tokendefs, processed, state)
return processed
@ -493,7 +492,7 @@ class RegexLexerMeta(LexerMeta):
for c in itertools.chain((cls,), cls.__mro__):
toks = c.__dict__.get('tokens', {})
for state, items in toks.iteritems():
for state, items in toks.items():
curitems = tokens.get(state)
if curitems is None:
tokens[state] = items
@ -533,13 +532,12 @@ class RegexLexerMeta(LexerMeta):
return type.__call__(cls, *args, **kwds)
class RegexLexer(Lexer):
class RegexLexer(Lexer, metaclass=RegexLexerMeta):
"""
Base for simple stateful regular expression-based lexers.
Simplifies the lexing process so that you need only
provide a list of states and regular expressions.
"""
__metaclass__ = RegexLexerMeta
#: Flags for compiling the regular expressions.
#: Defaults to MULTILINE.
@ -609,7 +607,7 @@ class RegexLexer(Lexer):
# at EOL, reset state to "root"
statestack = ['root']
statetokens = tokendefs['root']
yield pos, Text, u'\n'
yield pos, Text, '\n'
pos += 1
continue
yield pos, Error, text[pos]
@ -693,7 +691,7 @@ class ExtendedRegexLexer(RegexLexer):
# at EOL, reset state to "root"
ctx.stack = ['root']
statetokens = tokendefs['root']
yield ctx.pos, Text, u'\n'
yield ctx.pos, Text, '\n'
ctx.pos += 1
continue
yield ctx.pos, Error, text[ctx.pos]
@ -718,7 +716,7 @@ def do_insertions(insertions, tokens):
"""
insertions = iter(insertions)
try:
index, itokens = insertions.next()
index, itokens = next(insertions)
except StopIteration:
# no insertions
for item in tokens:
@ -744,7 +742,7 @@ def do_insertions(insertions, tokens):
realpos += len(it_value)
oldi = index - i
try:
index, itokens = insertions.next()
index, itokens = next(insertions)
except StopIteration:
insleft = False
break # not strictly necessary
@ -759,7 +757,7 @@ def do_insertions(insertions, tokens):
yield realpos, t, v
realpos += len(v)
try:
index, itokens = insertions.next()
index, itokens = next(insertions)
except StopIteration:
insleft = False
break # not strictly necessary

View file

@ -21,7 +21,7 @@ from pygments.util import ClassNotFound, bytes
__all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class',
'guess_lexer'] + LEXERS.keys()
'guess_lexer'] + list(LEXERS.keys())
_lexer_cache = {}
@ -41,7 +41,7 @@ def get_all_lexers():
Return a generator of tuples in the form ``(name, aliases,
filenames, mimetypes)`` of all know lexers.
"""
for item in LEXERS.itervalues():
for item in LEXERS.values():
yield item[1:]
for lexer in find_plugin_lexers():
yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes
@ -54,7 +54,7 @@ def find_lexer_class(name):
if name in _lexer_cache:
return _lexer_cache[name]
# lookup builtin lexers
for module_name, lname, aliases, _, _ in LEXERS.itervalues():
for module_name, lname, aliases, _, _ in LEXERS.values():
if name == lname:
_load_lexers(module_name)
return _lexer_cache[name]
@ -69,7 +69,7 @@ def get_lexer_by_name(_alias, **options):
Get a lexer by an alias.
"""
# lookup builtin lexers
for module_name, name, aliases, _, _ in LEXERS.itervalues():
for module_name, name, aliases, _, _ in LEXERS.values():
if _alias in aliases:
if name not in _lexer_cache:
_load_lexers(module_name)
@ -89,7 +89,7 @@ def get_lexer_for_filename(_fn, code=None, **options):
"""
matches = []
fn = basename(_fn)
for modname, name, _, filenames, _ in LEXERS.itervalues():
for modname, name, _, filenames, _ in LEXERS.values():
for filename in filenames:
if fnmatch.fnmatch(fn, filename):
if name not in _lexer_cache:
@ -118,7 +118,7 @@ def get_lexer_for_filename(_fn, code=None, **options):
if matches:
matches.sort(key=get_rating)
#print("Possible lexers, after sort:", matches)
#print "Possible lexers, after sort:", matches
return matches[-1][0](**options)
raise ClassNotFound('no lexer for filename %r found' % _fn)
@ -127,7 +127,7 @@ def get_lexer_for_mimetype(_mime, **options):
"""
Get a lexer for a mimetype.
"""
for modname, name, _, _, mimetypes in LEXERS.itervalues():
for modname, name, _, _, mimetypes in LEXERS.values():
if _mime in mimetypes:
if name not in _lexer_cache:
_load_lexers(modname)

View file

@ -142,7 +142,7 @@ MODULES = {'basic': ['_G',
if __name__ == '__main__':
import re
import urllib
import urllib.request, urllib.parse, urllib.error
import pprint
# you can't generally find out what module a function belongs to if you
@ -188,7 +188,7 @@ if __name__ == '__main__':
def get_newest_version():
f = urllib.urlopen('http://www.lua.org/manual/')
f = urllib.request.urlopen('http://www.lua.org/manual/')
r = re.compile(r'^<A HREF="(\d\.\d)/">Lua \1</A>')
for line in f:
m = r.match(line)
@ -196,7 +196,7 @@ if __name__ == '__main__':
return m.groups()[0]
def get_lua_functions(version):
f = urllib.urlopen('http://www.lua.org/manual/%s/' % version)
f = urllib.request.urlopen('http://www.lua.org/manual/%s/' % version)
r = re.compile(r'^<A HREF="manual.html#pdf-(.+)">\1</A>')
functions = []
for line in f:
@ -206,7 +206,7 @@ if __name__ == '__main__':
return functions
def get_function_module(name):
for mod, cb in module_callbacks().iteritems():
for mod, cb in module_callbacks().items():
if cb(name):
return mod
if '.' in name:

View file

@ -3711,7 +3711,7 @@ if __name__ == '__main__':
import re
import shutil
import tarfile
import urllib
import urllib.request, urllib.parse, urllib.error
PHP_MANUAL_URL = 'http://us3.php.net/distributions/manual/php_manual_en.tar.gz'
PHP_MANUAL_DIR = './php-chunked-xhtml/'
@ -3752,7 +3752,7 @@ if __name__ == '__main__':
return modules
def get_php_references():
download = urllib.urlretrieve(PHP_MANUAL_URL)
download = urllib.request.urlretrieve(PHP_MANUAL_URL)
tar = tarfile.open(download[0])
tar.extractall()
tar.close()
@ -3779,7 +3779,7 @@ if __name__ == '__main__':
def run():
print('>> Downloading Function Index')
modules = get_php_functions()
total = sum(len(v) for v in modules.itervalues())
total = sum(len(v) for v in modules.values())
print('%d functions found' % total)
regenerate(__file__, modules)
shutil.rmtree(PHP_MANUAL_DIR)

View file

@ -10,7 +10,7 @@
"""
import re
import urllib
import urllib.request, urllib.parse, urllib.error
# One man's constant is another man's variable.
SOURCE_URL = 'https://github.com/postgres/postgres/raw/master'
@ -97,7 +97,7 @@ def parse_pseudos(f):
return dt
def fetch(url):
return urllib.urlopen(url)
return urllib.request.urlopen(url)
def update_consts(filename, constname, content):
f = open(filename)

View file

@ -77,7 +77,7 @@ class RobotFrameworkLexer(Lexer):
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, unicode(value)
yield index, token, str(value)
index += len(value)

View file

@ -1012,7 +1012,7 @@ if __name__ == '__main__':
import pprint
import re
import sys
import urllib
import urllib.request, urllib.parse, urllib.error
# urllib ends up wanting to import a module called 'math' -- if
# pygments/lexers is in the path, this ends badly.
@ -1021,7 +1021,7 @@ if __name__ == '__main__':
del sys.path[i]
def get_version():
f = urllib.urlopen('http://docs.sourcemod.net/api/index.php')
f = urllib.request.urlopen('http://docs.sourcemod.net/api/index.php')
r = re.compile(r'SourceMod v\.<b>([\d\.]+)</td>')
for line in f:
m = r.search(line)
@ -1029,7 +1029,7 @@ if __name__ == '__main__':
return m.groups()[0]
def get_sm_functions():
f = urllib.urlopen('http://docs.sourcemod.net/api/SMfuncs.js')
f = urllib.request.urlopen('http://docs.sourcemod.net/api/SMfuncs.js')
r = re.compile(r'SMfunctions\[\d+\] = Array \("(?:public )?([^,]+)",".+"\);')
functions = []
for line in f:

View file

@ -332,17 +332,17 @@ class PythonConsoleLexer(Lexer):
tb = 0
for match in line_re.finditer(text):
line = match.group()
if line.startswith(u'>>> ') or line.startswith(u'... '):
if line.startswith('>>> ') or line.startswith('... '):
tb = 0
insertions.append((len(curcode),
[(0, Generic.Prompt, line[:4])]))
curcode += line[4:]
elif line.rstrip() == u'...' and not tb:
elif line.rstrip() == '...' and not tb:
# only a new >>> prompt can end an exception block
# otherwise an ellipsis in place of the traceback frames
# will be mishandled
insertions.append((len(curcode),
[(0, Generic.Prompt, u'...')]))
[(0, Generic.Prompt, '...')]))
curcode += line[3:]
else:
if curcode:
@ -351,8 +351,8 @@ class PythonConsoleLexer(Lexer):
yield item
curcode = ''
insertions = []
if (line.startswith(u'Traceback (most recent call last):') or
re.match(ur' File "[^"]+", line \d+\n$', line)):
if (line.startswith('Traceback (most recent call last):') or
re.match(r' File "[^"]+", line \d+\n$', line)):
tb = 1
curtb = line
tbindex = match.start()
@ -360,7 +360,7 @@ class PythonConsoleLexer(Lexer):
yield match.start(), Name.Class, line
elif tb:
curtb += line
if not (line.startswith(' ') or line.strip() == u'...'):
if not (line.startswith(' ') or line.strip() == '...'):
tb = 0
for i, t, v in tblexer.get_tokens_unprocessed(curtb):
yield tbindex+i, t, v
@ -1126,7 +1126,7 @@ class LuaLexer(RegexLexer):
self._functions = set()
if self.func_name_highlighting:
from pygments.lexers._luabuiltins import MODULES
for mod, func in MODULES.iteritems():
for mod, func in MODULES.items():
if mod not in self.disabled_modules:
self._functions.update(func)
RegexLexer.__init__(self, **options)
@ -1141,7 +1141,7 @@ class LuaLexer(RegexLexer):
elif '.' in value:
a, b = value.split('.')
yield index, Name, a
yield index + len(a), Punctuation, u'.'
yield index + len(a), Punctuation, '.'
yield index + len(a) + 1, Name, b
continue
yield index, token, value
@ -2032,54 +2032,54 @@ class Perl6Lexer(ExtendedRegexLexer):
# Perl 6 has a *lot* of possible bracketing characters
# this list was lifted from STD.pm6 (https://github.com/perl6/std)
PERL6_BRACKETS = {
u'\u0028' : u'\u0029', u'\u003c' : u'\u003e', u'\u005b' : u'\u005d', u'\u007b' : u'\u007d',
u'\u00ab' : u'\u00bb', u'\u0f3a' : u'\u0f3b', u'\u0f3c' : u'\u0f3d', u'\u169b' : u'\u169c',
u'\u2018' : u'\u2019', u'\u201a' : u'\u2019', u'\u201b' : u'\u2019', u'\u201c' : u'\u201d',
u'\u201e' : u'\u201d', u'\u201f' : u'\u201d', u'\u2039' : u'\u203a', u'\u2045' : u'\u2046',
u'\u207d' : u'\u207e', u'\u208d' : u'\u208e', u'\u2208' : u'\u220b', u'\u2209' : u'\u220c',
u'\u220a' : u'\u220d', u'\u2215' : u'\u29f5', u'\u223c' : u'\u223d', u'\u2243' : u'\u22cd',
u'\u2252' : u'\u2253', u'\u2254' : u'\u2255', u'\u2264' : u'\u2265', u'\u2266' : u'\u2267',
u'\u2268' : u'\u2269', u'\u226a' : u'\u226b', u'\u226e' : u'\u226f', u'\u2270' : u'\u2271',
u'\u2272' : u'\u2273', u'\u2274' : u'\u2275', u'\u2276' : u'\u2277', u'\u2278' : u'\u2279',
u'\u227a' : u'\u227b', u'\u227c' : u'\u227d', u'\u227e' : u'\u227f', u'\u2280' : u'\u2281',
u'\u2282' : u'\u2283', u'\u2284' : u'\u2285', u'\u2286' : u'\u2287', u'\u2288' : u'\u2289',
u'\u228a' : u'\u228b', u'\u228f' : u'\u2290', u'\u2291' : u'\u2292', u'\u2298' : u'\u29b8',
u'\u22a2' : u'\u22a3', u'\u22a6' : u'\u2ade', u'\u22a8' : u'\u2ae4', u'\u22a9' : u'\u2ae3',
u'\u22ab' : u'\u2ae5', u'\u22b0' : u'\u22b1', u'\u22b2' : u'\u22b3', u'\u22b4' : u'\u22b5',
u'\u22b6' : u'\u22b7', u'\u22c9' : u'\u22ca', u'\u22cb' : u'\u22cc', u'\u22d0' : u'\u22d1',
u'\u22d6' : u'\u22d7', u'\u22d8' : u'\u22d9', u'\u22da' : u'\u22db', u'\u22dc' : u'\u22dd',
u'\u22de' : u'\u22df', u'\u22e0' : u'\u22e1', u'\u22e2' : u'\u22e3', u'\u22e4' : u'\u22e5',
u'\u22e6' : u'\u22e7', u'\u22e8' : u'\u22e9', u'\u22ea' : u'\u22eb', u'\u22ec' : u'\u22ed',
u'\u22f0' : u'\u22f1', u'\u22f2' : u'\u22fa', u'\u22f3' : u'\u22fb', u'\u22f4' : u'\u22fc',
u'\u22f6' : u'\u22fd', u'\u22f7' : u'\u22fe', u'\u2308' : u'\u2309', u'\u230a' : u'\u230b',
u'\u2329' : u'\u232a', u'\u23b4' : u'\u23b5', u'\u2768' : u'\u2769', u'\u276a' : u'\u276b',
u'\u276c' : u'\u276d', u'\u276e' : u'\u276f', u'\u2770' : u'\u2771', u'\u2772' : u'\u2773',
u'\u2774' : u'\u2775', u'\u27c3' : u'\u27c4', u'\u27c5' : u'\u27c6', u'\u27d5' : u'\u27d6',
u'\u27dd' : u'\u27de', u'\u27e2' : u'\u27e3', u'\u27e4' : u'\u27e5', u'\u27e6' : u'\u27e7',
u'\u27e8' : u'\u27e9', u'\u27ea' : u'\u27eb', u'\u2983' : u'\u2984', u'\u2985' : u'\u2986',
u'\u2987' : u'\u2988', u'\u2989' : u'\u298a', u'\u298b' : u'\u298c', u'\u298d' : u'\u298e',
u'\u298f' : u'\u2990', u'\u2991' : u'\u2992', u'\u2993' : u'\u2994', u'\u2995' : u'\u2996',
u'\u2997' : u'\u2998', u'\u29c0' : u'\u29c1', u'\u29c4' : u'\u29c5', u'\u29cf' : u'\u29d0',
u'\u29d1' : u'\u29d2', u'\u29d4' : u'\u29d5', u'\u29d8' : u'\u29d9', u'\u29da' : u'\u29db',
u'\u29f8' : u'\u29f9', u'\u29fc' : u'\u29fd', u'\u2a2b' : u'\u2a2c', u'\u2a2d' : u'\u2a2e',
u'\u2a34' : u'\u2a35', u'\u2a3c' : u'\u2a3d', u'\u2a64' : u'\u2a65', u'\u2a79' : u'\u2a7a',
u'\u2a7d' : u'\u2a7e', u'\u2a7f' : u'\u2a80', u'\u2a81' : u'\u2a82', u'\u2a83' : u'\u2a84',
u'\u2a8b' : u'\u2a8c', u'\u2a91' : u'\u2a92', u'\u2a93' : u'\u2a94', u'\u2a95' : u'\u2a96',
u'\u2a97' : u'\u2a98', u'\u2a99' : u'\u2a9a', u'\u2a9b' : u'\u2a9c', u'\u2aa1' : u'\u2aa2',
u'\u2aa6' : u'\u2aa7', u'\u2aa8' : u'\u2aa9', u'\u2aaa' : u'\u2aab', u'\u2aac' : u'\u2aad',
u'\u2aaf' : u'\u2ab0', u'\u2ab3' : u'\u2ab4', u'\u2abb' : u'\u2abc', u'\u2abd' : u'\u2abe',
u'\u2abf' : u'\u2ac0', u'\u2ac1' : u'\u2ac2', u'\u2ac3' : u'\u2ac4', u'\u2ac5' : u'\u2ac6',
u'\u2acd' : u'\u2ace', u'\u2acf' : u'\u2ad0', u'\u2ad1' : u'\u2ad2', u'\u2ad3' : u'\u2ad4',
u'\u2ad5' : u'\u2ad6', u'\u2aec' : u'\u2aed', u'\u2af7' : u'\u2af8', u'\u2af9' : u'\u2afa',
u'\u2e02' : u'\u2e03', u'\u2e04' : u'\u2e05', u'\u2e09' : u'\u2e0a', u'\u2e0c' : u'\u2e0d',
u'\u2e1c' : u'\u2e1d', u'\u2e20' : u'\u2e21', u'\u3008' : u'\u3009', u'\u300a' : u'\u300b',
u'\u300c' : u'\u300d', u'\u300e' : u'\u300f', u'\u3010' : u'\u3011', u'\u3014' : u'\u3015',
u'\u3016' : u'\u3017', u'\u3018' : u'\u3019', u'\u301a' : u'\u301b', u'\u301d' : u'\u301e',
u'\ufd3e' : u'\ufd3f', u'\ufe17' : u'\ufe18', u'\ufe35' : u'\ufe36', u'\ufe37' : u'\ufe38',
u'\ufe39' : u'\ufe3a', u'\ufe3b' : u'\ufe3c', u'\ufe3d' : u'\ufe3e', u'\ufe3f' : u'\ufe40',
u'\ufe41' : u'\ufe42', u'\ufe43' : u'\ufe44', u'\ufe47' : u'\ufe48', u'\ufe59' : u'\ufe5a',
u'\ufe5b' : u'\ufe5c', u'\ufe5d' : u'\ufe5e', u'\uff08' : u'\uff09', u'\uff1c' : u'\uff1e',
u'\uff3b' : u'\uff3d', u'\uff5b' : u'\uff5d', u'\uff5f' : u'\uff60', u'\uff62' : u'\uff63',
'\u0028' : '\u0029', '\u003c' : '\u003e', '\u005b' : '\u005d', '\u007b' : '\u007d',
'\u00ab' : '\u00bb', '\u0f3a' : '\u0f3b', '\u0f3c' : '\u0f3d', '\u169b' : '\u169c',
'\u2018' : '\u2019', '\u201a' : '\u2019', '\u201b' : '\u2019', '\u201c' : '\u201d',
'\u201e' : '\u201d', '\u201f' : '\u201d', '\u2039' : '\u203a', '\u2045' : '\u2046',
'\u207d' : '\u207e', '\u208d' : '\u208e', '\u2208' : '\u220b', '\u2209' : '\u220c',
'\u220a' : '\u220d', '\u2215' : '\u29f5', '\u223c' : '\u223d', '\u2243' : '\u22cd',
'\u2252' : '\u2253', '\u2254' : '\u2255', '\u2264' : '\u2265', '\u2266' : '\u2267',
'\u2268' : '\u2269', '\u226a' : '\u226b', '\u226e' : '\u226f', '\u2270' : '\u2271',
'\u2272' : '\u2273', '\u2274' : '\u2275', '\u2276' : '\u2277', '\u2278' : '\u2279',
'\u227a' : '\u227b', '\u227c' : '\u227d', '\u227e' : '\u227f', '\u2280' : '\u2281',
'\u2282' : '\u2283', '\u2284' : '\u2285', '\u2286' : '\u2287', '\u2288' : '\u2289',
'\u228a' : '\u228b', '\u228f' : '\u2290', '\u2291' : '\u2292', '\u2298' : '\u29b8',
'\u22a2' : '\u22a3', '\u22a6' : '\u2ade', '\u22a8' : '\u2ae4', '\u22a9' : '\u2ae3',
'\u22ab' : '\u2ae5', '\u22b0' : '\u22b1', '\u22b2' : '\u22b3', '\u22b4' : '\u22b5',
'\u22b6' : '\u22b7', '\u22c9' : '\u22ca', '\u22cb' : '\u22cc', '\u22d0' : '\u22d1',
'\u22d6' : '\u22d7', '\u22d8' : '\u22d9', '\u22da' : '\u22db', '\u22dc' : '\u22dd',
'\u22de' : '\u22df', '\u22e0' : '\u22e1', '\u22e2' : '\u22e3', '\u22e4' : '\u22e5',
'\u22e6' : '\u22e7', '\u22e8' : '\u22e9', '\u22ea' : '\u22eb', '\u22ec' : '\u22ed',
'\u22f0' : '\u22f1', '\u22f2' : '\u22fa', '\u22f3' : '\u22fb', '\u22f4' : '\u22fc',
'\u22f6' : '\u22fd', '\u22f7' : '\u22fe', '\u2308' : '\u2309', '\u230a' : '\u230b',
'\u2329' : '\u232a', '\u23b4' : '\u23b5', '\u2768' : '\u2769', '\u276a' : '\u276b',
'\u276c' : '\u276d', '\u276e' : '\u276f', '\u2770' : '\u2771', '\u2772' : '\u2773',
'\u2774' : '\u2775', '\u27c3' : '\u27c4', '\u27c5' : '\u27c6', '\u27d5' : '\u27d6',
'\u27dd' : '\u27de', '\u27e2' : '\u27e3', '\u27e4' : '\u27e5', '\u27e6' : '\u27e7',
'\u27e8' : '\u27e9', '\u27ea' : '\u27eb', '\u2983' : '\u2984', '\u2985' : '\u2986',
'\u2987' : '\u2988', '\u2989' : '\u298a', '\u298b' : '\u298c', '\u298d' : '\u298e',
'\u298f' : '\u2990', '\u2991' : '\u2992', '\u2993' : '\u2994', '\u2995' : '\u2996',
'\u2997' : '\u2998', '\u29c0' : '\u29c1', '\u29c4' : '\u29c5', '\u29cf' : '\u29d0',
'\u29d1' : '\u29d2', '\u29d4' : '\u29d5', '\u29d8' : '\u29d9', '\u29da' : '\u29db',
'\u29f8' : '\u29f9', '\u29fc' : '\u29fd', '\u2a2b' : '\u2a2c', '\u2a2d' : '\u2a2e',
'\u2a34' : '\u2a35', '\u2a3c' : '\u2a3d', '\u2a64' : '\u2a65', '\u2a79' : '\u2a7a',
'\u2a7d' : '\u2a7e', '\u2a7f' : '\u2a80', '\u2a81' : '\u2a82', '\u2a83' : '\u2a84',
'\u2a8b' : '\u2a8c', '\u2a91' : '\u2a92', '\u2a93' : '\u2a94', '\u2a95' : '\u2a96',
'\u2a97' : '\u2a98', '\u2a99' : '\u2a9a', '\u2a9b' : '\u2a9c', '\u2aa1' : '\u2aa2',
'\u2aa6' : '\u2aa7', '\u2aa8' : '\u2aa9', '\u2aaa' : '\u2aab', '\u2aac' : '\u2aad',
'\u2aaf' : '\u2ab0', '\u2ab3' : '\u2ab4', '\u2abb' : '\u2abc', '\u2abd' : '\u2abe',
'\u2abf' : '\u2ac0', '\u2ac1' : '\u2ac2', '\u2ac3' : '\u2ac4', '\u2ac5' : '\u2ac6',
'\u2acd' : '\u2ace', '\u2acf' : '\u2ad0', '\u2ad1' : '\u2ad2', '\u2ad3' : '\u2ad4',
'\u2ad5' : '\u2ad6', '\u2aec' : '\u2aed', '\u2af7' : '\u2af8', '\u2af9' : '\u2afa',
'\u2e02' : '\u2e03', '\u2e04' : '\u2e05', '\u2e09' : '\u2e0a', '\u2e0c' : '\u2e0d',
'\u2e1c' : '\u2e1d', '\u2e20' : '\u2e21', '\u3008' : '\u3009', '\u300a' : '\u300b',
'\u300c' : '\u300d', '\u300e' : '\u300f', '\u3010' : '\u3011', '\u3014' : '\u3015',
'\u3016' : '\u3017', '\u3018' : '\u3019', '\u301a' : '\u301b', '\u301d' : '\u301e',
'\ufd3e' : '\ufd3f', '\ufe17' : '\ufe18', '\ufe35' : '\ufe36', '\ufe37' : '\ufe38',
'\ufe39' : '\ufe3a', '\ufe3b' : '\ufe3c', '\ufe3d' : '\ufe3e', '\ufe3f' : '\ufe40',
'\ufe41' : '\ufe42', '\ufe43' : '\ufe44', '\ufe47' : '\ufe48', '\ufe59' : '\ufe5a',
'\ufe5b' : '\ufe5c', '\ufe5d' : '\ufe5e', '\uff08' : '\uff09', '\uff1c' : '\uff1e',
'\uff3b' : '\uff3d', '\uff5b' : '\uff5d', '\uff5f' : '\uff60', '\uff62' : '\uff63',
}
def _build_word_match(words, boundary_regex_fragment = None, prefix = '', suffix = ''):
@ -2179,7 +2179,7 @@ class Perl6Lexer(ExtendedRegexLexer):
# process the corresponding one!
tokens = {
'common' : [
(r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)),
(r'#[`|=](?P<delimiter>(?P<first_char>[' + ''.join(list(PERL6_BRACKETS.keys())) + r'])(?P=first_char)*)', brackets_callback(Comment.Multiline)),
(r'#[^\n]*$', Comment.Singleline),
(r'^(\s*)=begin\s+(\w+)\b.*?^\1=end\s+\2', Comment.Multiline),
(r'^(\s*)=for.*?\n\s*?\n', Comment.Multiline),
@ -2192,10 +2192,10 @@ class Perl6Lexer(ExtendedRegexLexer):
(_build_word_match(PERL6_BUILTIN_CLASSES, PERL6_IDENTIFIER_RANGE, suffix = '(?::[UD])?'), Name.Builtin),
(_build_word_match(PERL6_BUILTINS, PERL6_IDENTIFIER_RANGE), Name.Builtin),
# copied from PerlLexer
(r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable),
(r'[$@%&][.^:?=!~]?' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable),
(r'\$[!/](?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
(r'::\?\w+', Name.Variable.Global),
(r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + u'+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
(r'[$@%&]\*' + PERL6_IDENTIFIER_RANGE + '+(?:<<.*?>>|<.*?>|«.*?»)*', Name.Variable.Global),
(r'\$(?:<.*?>)+', Name.Variable),
(r'(?:q|qq|Q)[a-zA-Z]?\s*(?P<adverbs>:[\w\s:]+)?\s*(?P<delimiter>(?P<first_char>[^0-9a-zA-Z:\s])(?P=first_char)*)', brackets_callback(String)),
# copied from PerlLexer
@ -2228,7 +2228,7 @@ class Perl6Lexer(ExtendedRegexLexer):
(r'.+?', Text),
],
'token-sym-brackets' : [
(r'(?P<delimiter>(?P<first_char>[' + ''.join(PERL6_BRACKETS.keys()) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')),
(r'(?P<delimiter>(?P<first_char>[' + ''.join(list(PERL6_BRACKETS.keys())) + '])(?P=first_char)*)', brackets_callback(Name), ('#pop', 'pre-token')),
(r'', Name, ('#pop', 'pre-token')),
],
'token': [

View file

@ -835,7 +835,7 @@ class DelphiLexer(Lexer):
if get_bool_opt(options, 'freepascal', True):
self.keywords.update(self.FREE_PASCAL_KEYWORDS)
self.builtins = set()
for unit in get_list_opt(options, 'units', self.BUILTIN_UNITS.keys()):
for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS.keys())):
self.builtins.update(self.BUILTIN_UNITS[unit])
def get_tokens_unprocessed(self, text):
@ -1630,22 +1630,22 @@ class PrologLexer(RegexLexer):
(r'(mod|div|not)\b', Operator),
(r'_', Keyword), # The don't-care variable
(r'([a-z]+)(:)', bygroups(Name.Namespace, Punctuation)),
(u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
u'(\\s*)(:-|-->)',
('([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
'(\\s*)(:-|-->)',
bygroups(Name.Function, Text, Operator)), # function defn
(u'([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
u'(\\s*)(\\()',
('([a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*)'
'(\\s*)(\\()',
bygroups(Name.Function, Text, Punctuation)),
(u'[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
u'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*',
('[a-z\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]'
'[a-zA-Z0-9_$\u00c0-\u1fff\u3040-\ud7ff\ue000-\uffef]*',
String.Atom), # atom, characters
# This one includes !
(u'[#&*+\\-./:<=>?@\\\\^~\u00a1-\u00bf\u2010-\u303f]+',
('[#&*+\\-./:<=>?@\\\\^~\u00a1-\u00bf\u2010-\u303f]+',
String.Atom), # atom, graphics
(r'[A-Z_][A-Za-z0-9_]*', Name.Variable),
(u'\\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text),
('\\s+|[\u2000-\u200f\ufff0-\ufffe\uffef]', Text),
],
'nested-comment': [
(r'\*/', Comment.Multiline, '#pop'),

View file

@ -71,7 +71,7 @@ class CSharpLexer(RegexLexer):
tokens = {}
token_variants = True
for levelname, cs_ident in levels.items():
for levelname, cs_ident in list(levels.items()):
tokens[levelname] = {
'root': [
# method names
@ -126,7 +126,7 @@ class CSharpLexer(RegexLexer):
}
def __init__(self, **options):
level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(), 'basic')
level = get_choice_opt(options, 'unicodelevel', list(self.tokens.keys()), 'basic')
if level not in self._all_tokens:
# compile the regexes now
self._tokens = self.__class__.process_tokendef(level)
@ -183,7 +183,7 @@ class NemerleLexer(RegexLexer):
tokens = {}
token_variants = True
for levelname, cs_ident in levels.items():
for levelname, cs_ident in list(levels.items()):
tokens[levelname] = {
'root': [
# method names
@ -284,7 +284,7 @@ class NemerleLexer(RegexLexer):
}
def __init__(self, **options):
level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(),
level = get_choice_opt(options, 'unicodelevel', list(self.tokens.keys()),
'basic')
if level not in self._all_tokens:
# compile the regexes now

View file

@ -1049,12 +1049,12 @@ class AgdaLexer(RegexLexer):
(r'{!', Comment.Directive, 'hole'),
# Lexemes:
# Identifiers
(ur'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
(r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
(r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
(r'\b(Set|Prop)\b', Keyword.Type),
# Special Symbols
(r'(\(|\)|\{|\})', Operator),
(ur'(\.{1,3}|\||[\u039B]|[\u2200]|[\u2192]|:|=|->)', Operator.Word),
(r'(\.{1,3}|\||[\u039B]|[\u2200]|[\u2192]|:|=|->)', Operator.Word),
# Numbers
(r'\d+[eE][+-]?\d+', Number.Float),
(r'\d+\.\d+([eE][+-]?\d+)?', Number.Float),
@ -2157,7 +2157,7 @@ class CoqLexer(RegexLexer):
'<-', '=', '>', '>]', '>}', r'\?', r'\?\?', r'\[', r'\[<', r'\[>',
r'\[\|', ']', '_', '`', '{', '{<', r'\|', r'\|]', '}', '~', '=>',
r'/\\', r'\\/',
u'Π', u'λ',
'Π', 'λ',
]
operators = r'[!$%&*+\./:<=>?@^|~-]'
word_operators = ['and', 'asr', 'land', 'lor', 'lsl', 'lxor', 'mod', 'or']
@ -2490,7 +2490,7 @@ class ElixirConsoleLexer(Lexer):
insertions = []
for match in line_re.finditer(text):
line = match.group()
if line.startswith(u'** '):
if line.startswith('** '):
insertions.append((len(curcode),
[(0, Generic.Error, line[:-1])]))
curcode += line[-1:]

View file

@ -131,137 +131,137 @@ class ScalaLexer(RegexLexer):
flags = re.MULTILINE | re.DOTALL
# don't use raw unicode strings!
op = (u'[-~\\^\\*!%&\\\\<>\\|+=:/?@\u00a6-\u00a7\u00a9\u00ac\u00ae\u00b0-\u00b1'
u'\u00b6\u00d7\u00f7\u03f6\u0482\u0606-\u0608\u060e-\u060f\u06e9'
u'\u06fd-\u06fe\u07f6\u09fa\u0b70\u0bf3-\u0bf8\u0bfa\u0c7f\u0cf1-\u0cf2'
u'\u0d79\u0f01-\u0f03\u0f13-\u0f17\u0f1a-\u0f1f\u0f34\u0f36\u0f38'
u'\u0fbe-\u0fc5\u0fc7-\u0fcf\u109e-\u109f\u1360\u1390-\u1399\u1940'
u'\u19e0-\u19ff\u1b61-\u1b6a\u1b74-\u1b7c\u2044\u2052\u207a-\u207c'
u'\u208a-\u208c\u2100-\u2101\u2103-\u2106\u2108-\u2109\u2114\u2116-\u2118'
u'\u211e-\u2123\u2125\u2127\u2129\u212e\u213a-\u213b\u2140-\u2144'
u'\u214a-\u214d\u214f\u2190-\u2328\u232b-\u244a\u249c-\u24e9\u2500-\u2767'
u'\u2794-\u27c4\u27c7-\u27e5\u27f0-\u2982\u2999-\u29d7\u29dc-\u29fb'
u'\u29fe-\u2b54\u2ce5-\u2cea\u2e80-\u2ffb\u3004\u3012-\u3013\u3020'
u'\u3036-\u3037\u303e-\u303f\u3190-\u3191\u3196-\u319f\u31c0-\u31e3'
u'\u3200-\u321e\u322a-\u3250\u3260-\u327f\u328a-\u32b0\u32c0-\u33ff'
u'\u4dc0-\u4dff\ua490-\ua4c6\ua828-\ua82b\ufb29\ufdfd\ufe62\ufe64-\ufe66'
u'\uff0b\uff1c-\uff1e\uff5c\uff5e\uffe2\uffe4\uffe8-\uffee\ufffc-\ufffd]+')
op = ('[-~\\^\\*!%&\\\\<>\\|+=:/?@\u00a6-\u00a7\u00a9\u00ac\u00ae\u00b0-\u00b1'
'\u00b6\u00d7\u00f7\u03f6\u0482\u0606-\u0608\u060e-\u060f\u06e9'
'\u06fd-\u06fe\u07f6\u09fa\u0b70\u0bf3-\u0bf8\u0bfa\u0c7f\u0cf1-\u0cf2'
'\u0d79\u0f01-\u0f03\u0f13-\u0f17\u0f1a-\u0f1f\u0f34\u0f36\u0f38'
'\u0fbe-\u0fc5\u0fc7-\u0fcf\u109e-\u109f\u1360\u1390-\u1399\u1940'
'\u19e0-\u19ff\u1b61-\u1b6a\u1b74-\u1b7c\u2044\u2052\u207a-\u207c'
'\u208a-\u208c\u2100-\u2101\u2103-\u2106\u2108-\u2109\u2114\u2116-\u2118'
'\u211e-\u2123\u2125\u2127\u2129\u212e\u213a-\u213b\u2140-\u2144'
'\u214a-\u214d\u214f\u2190-\u2328\u232b-\u244a\u249c-\u24e9\u2500-\u2767'
'\u2794-\u27c4\u27c7-\u27e5\u27f0-\u2982\u2999-\u29d7\u29dc-\u29fb'
'\u29fe-\u2b54\u2ce5-\u2cea\u2e80-\u2ffb\u3004\u3012-\u3013\u3020'
'\u3036-\u3037\u303e-\u303f\u3190-\u3191\u3196-\u319f\u31c0-\u31e3'
'\u3200-\u321e\u322a-\u3250\u3260-\u327f\u328a-\u32b0\u32c0-\u33ff'
'\u4dc0-\u4dff\ua490-\ua4c6\ua828-\ua82b\ufb29\ufdfd\ufe62\ufe64-\ufe66'
'\uff0b\uff1c-\uff1e\uff5c\uff5e\uffe2\uffe4\uffe8-\uffee\ufffc-\ufffd]+')
letter = (u'[a-zA-Z\\$_\u00aa\u00b5\u00ba\u00c0-\u00d6\u00d8-\u00f6'
u'\u00f8-\u02af\u0370-\u0373\u0376-\u0377\u037b-\u037d\u0386'
u'\u0388-\u03f5\u03f7-\u0481\u048a-\u0556\u0561-\u0587\u05d0-\u05f2'
u'\u0621-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5'
u'\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5'
u'\u07b1\u07ca-\u07ea\u0904-\u0939\u093d\u0950\u0958-\u0961'
u'\u0972-\u097f\u0985-\u09b9\u09bd\u09ce\u09dc-\u09e1\u09f0-\u09f1'
u'\u0a05-\u0a39\u0a59-\u0a5e\u0a72-\u0a74\u0a85-\u0ab9\u0abd'
u'\u0ad0-\u0ae1\u0b05-\u0b39\u0b3d\u0b5c-\u0b61\u0b71\u0b83-\u0bb9'
u'\u0bd0\u0c05-\u0c3d\u0c58-\u0c61\u0c85-\u0cb9\u0cbd\u0cde-\u0ce1'
u'\u0d05-\u0d3d\u0d60-\u0d61\u0d7a-\u0d7f\u0d85-\u0dc6\u0e01-\u0e30'
u'\u0e32-\u0e33\u0e40-\u0e45\u0e81-\u0eb0\u0eb2-\u0eb3\u0ebd-\u0ec4'
u'\u0edc-\u0f00\u0f40-\u0f6c\u0f88-\u0f8b\u1000-\u102a\u103f'
u'\u1050-\u1055\u105a-\u105d\u1061\u1065-\u1066\u106e-\u1070'
u'\u1075-\u1081\u108e\u10a0-\u10fa\u1100-\u135a\u1380-\u138f'
u'\u13a0-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u1711'
u'\u1720-\u1731\u1740-\u1751\u1760-\u1770\u1780-\u17b3\u17dc'
u'\u1820-\u1842\u1844-\u18a8\u18aa-\u191c\u1950-\u19a9\u19c1-\u19c7'
u'\u1a00-\u1a16\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae-\u1baf'
u'\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1d00-\u1d2b\u1d62-\u1d77'
u'\u1d79-\u1d9a\u1e00-\u1fbc\u1fbe\u1fc2-\u1fcc\u1fd0-\u1fdb'
u'\u1fe0-\u1fec\u1ff2-\u1ffc\u2071\u207f\u2102\u2107\u210a-\u2113'
u'\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139'
u'\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c7c'
u'\u2c80-\u2ce4\u2d00-\u2d65\u2d80-\u2dde\u3006-\u3007\u3021-\u3029'
u'\u3038-\u303a\u303c\u3041-\u3096\u309f\u30a1-\u30fa\u30ff-\u318e'
u'\u31a0-\u31b7\u31f0-\u31ff\u3400-\u4db5\u4e00-\ua014\ua016-\ua48c'
u'\ua500-\ua60b\ua610-\ua61f\ua62a-\ua66e\ua680-\ua697\ua722-\ua76f'
u'\ua771-\ua787\ua78b-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822'
u'\ua840-\ua873\ua882-\ua8b3\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28'
u'\uaa40-\uaa42\uaa44-\uaa4b\uac00-\ud7a3\uf900-\ufb1d\ufb1f-\ufb28'
u'\ufb2a-\ufd3d\ufd50-\ufdfb\ufe70-\ufefc\uff21-\uff3a\uff41-\uff5a'
u'\uff66-\uff6f\uff71-\uff9d\uffa0-\uffdc]')
letter = ('[a-zA-Z\\$_\u00aa\u00b5\u00ba\u00c0-\u00d6\u00d8-\u00f6'
'\u00f8-\u02af\u0370-\u0373\u0376-\u0377\u037b-\u037d\u0386'
'\u0388-\u03f5\u03f7-\u0481\u048a-\u0556\u0561-\u0587\u05d0-\u05f2'
'\u0621-\u063f\u0641-\u064a\u066e-\u066f\u0671-\u06d3\u06d5'
'\u06ee-\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5'
'\u07b1\u07ca-\u07ea\u0904-\u0939\u093d\u0950\u0958-\u0961'
'\u0972-\u097f\u0985-\u09b9\u09bd\u09ce\u09dc-\u09e1\u09f0-\u09f1'
'\u0a05-\u0a39\u0a59-\u0a5e\u0a72-\u0a74\u0a85-\u0ab9\u0abd'
'\u0ad0-\u0ae1\u0b05-\u0b39\u0b3d\u0b5c-\u0b61\u0b71\u0b83-\u0bb9'
'\u0bd0\u0c05-\u0c3d\u0c58-\u0c61\u0c85-\u0cb9\u0cbd\u0cde-\u0ce1'
'\u0d05-\u0d3d\u0d60-\u0d61\u0d7a-\u0d7f\u0d85-\u0dc6\u0e01-\u0e30'
'\u0e32-\u0e33\u0e40-\u0e45\u0e81-\u0eb0\u0eb2-\u0eb3\u0ebd-\u0ec4'
'\u0edc-\u0f00\u0f40-\u0f6c\u0f88-\u0f8b\u1000-\u102a\u103f'
'\u1050-\u1055\u105a-\u105d\u1061\u1065-\u1066\u106e-\u1070'
'\u1075-\u1081\u108e\u10a0-\u10fa\u1100-\u135a\u1380-\u138f'
'\u13a0-\u166c\u166f-\u1676\u1681-\u169a\u16a0-\u16ea\u16ee-\u1711'
'\u1720-\u1731\u1740-\u1751\u1760-\u1770\u1780-\u17b3\u17dc'
'\u1820-\u1842\u1844-\u18a8\u18aa-\u191c\u1950-\u19a9\u19c1-\u19c7'
'\u1a00-\u1a16\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae-\u1baf'
'\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c77\u1d00-\u1d2b\u1d62-\u1d77'
'\u1d79-\u1d9a\u1e00-\u1fbc\u1fbe\u1fc2-\u1fcc\u1fd0-\u1fdb'
'\u1fe0-\u1fec\u1ff2-\u1ffc\u2071\u207f\u2102\u2107\u210a-\u2113'
'\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139'
'\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c7c'
'\u2c80-\u2ce4\u2d00-\u2d65\u2d80-\u2dde\u3006-\u3007\u3021-\u3029'
'\u3038-\u303a\u303c\u3041-\u3096\u309f\u30a1-\u30fa\u30ff-\u318e'
'\u31a0-\u31b7\u31f0-\u31ff\u3400-\u4db5\u4e00-\ua014\ua016-\ua48c'
'\ua500-\ua60b\ua610-\ua61f\ua62a-\ua66e\ua680-\ua697\ua722-\ua76f'
'\ua771-\ua787\ua78b-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822'
'\ua840-\ua873\ua882-\ua8b3\ua90a-\ua925\ua930-\ua946\uaa00-\uaa28'
'\uaa40-\uaa42\uaa44-\uaa4b\uac00-\ud7a3\uf900-\ufb1d\ufb1f-\ufb28'
'\ufb2a-\ufd3d\ufd50-\ufdfb\ufe70-\ufefc\uff21-\uff3a\uff41-\uff5a'
'\uff66-\uff6f\uff71-\uff9d\uffa0-\uffdc]')
upper = (u'[A-Z\\$_\u00c0-\u00d6\u00d8-\u00de\u0100\u0102\u0104\u0106\u0108'
u'\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c'
u'\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130'
u'\u0132\u0134\u0136\u0139\u013b\u013d\u013f\u0141\u0143\u0145'
u'\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a'
u'\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e'
u'\u0170\u0172\u0174\u0176\u0178-\u0179\u017b\u017d\u0181-\u0182'
u'\u0184\u0186-\u0187\u0189-\u018b\u018e-\u0191\u0193-\u0194'
u'\u0196-\u0198\u019c-\u019d\u019f-\u01a0\u01a2\u01a4\u01a6-\u01a7'
u'\u01a9\u01ac\u01ae-\u01af\u01b1-\u01b3\u01b5\u01b7-\u01b8\u01bc'
u'\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9'
u'\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee'
u'\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe\u0200\u0202\u0204'
u'\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218'
u'\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c'
u'\u022e\u0230\u0232\u023a-\u023b\u023d-\u023e\u0241\u0243-\u0246'
u'\u0248\u024a\u024c\u024e\u0370\u0372\u0376\u0386\u0388-\u038f'
u'\u0391-\u03ab\u03cf\u03d2-\u03d4\u03d8\u03da\u03dc\u03de\u03e0'
u'\u03e2\u03e4\u03e6\u03e8\u03ea\u03ec\u03ee\u03f4\u03f7'
u'\u03f9-\u03fa\u03fd-\u042f\u0460\u0462\u0464\u0466\u0468\u046a'
u'\u046c\u046e\u0470\u0472\u0474\u0476\u0478\u047a\u047c\u047e'
u'\u0480\u048a\u048c\u048e\u0490\u0492\u0494\u0496\u0498\u049a'
u'\u049c\u049e\u04a0\u04a2\u04a4\u04a6\u04a8\u04aa\u04ac\u04ae'
u'\u04b0\u04b2\u04b4\u04b6\u04b8\u04ba\u04bc\u04be\u04c0-\u04c1'
u'\u04c3\u04c5\u04c7\u04c9\u04cb\u04cd\u04d0\u04d2\u04d4\u04d6'
u'\u04d8\u04da\u04dc\u04de\u04e0\u04e2\u04e4\u04e6\u04e8\u04ea'
u'\u04ec\u04ee\u04f0\u04f2\u04f4\u04f6\u04f8\u04fa\u04fc\u04fe'
u'\u0500\u0502\u0504\u0506\u0508\u050a\u050c\u050e\u0510\u0512'
u'\u0514\u0516\u0518\u051a\u051c\u051e\u0520\u0522\u0531-\u0556'
u'\u10a0-\u10c5\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e'
u'\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22'
u'\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36'
u'\u1e38\u1e3a\u1e3c\u1e3e\u1e40\u1e42\u1e44\u1e46\u1e48\u1e4a'
u'\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e'
u'\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72'
u'\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e\u1e80\u1e82\u1e84\u1e86'
u'\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2'
u'\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6'
u'\u1eb8\u1eba\u1ebc\u1ebe\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca'
u'\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede'
u'\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2'
u'\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe\u1f08-\u1f0f\u1f18-\u1f1d'
u'\u1f28-\u1f2f\u1f38-\u1f3f\u1f48-\u1f4d\u1f59-\u1f5f'
u'\u1f68-\u1f6f\u1fb8-\u1fbb\u1fc8-\u1fcb\u1fd8-\u1fdb'
u'\u1fe8-\u1fec\u1ff8-\u1ffb\u2102\u2107\u210b-\u210d\u2110-\u2112'
u'\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u2130-\u2133'
u'\u213e-\u213f\u2145\u2183\u2c00-\u2c2e\u2c60\u2c62-\u2c64\u2c67'
u'\u2c69\u2c6b\u2c6d-\u2c6f\u2c72\u2c75\u2c80\u2c82\u2c84\u2c86'
u'\u2c88\u2c8a\u2c8c\u2c8e\u2c90\u2c92\u2c94\u2c96\u2c98\u2c9a'
u'\u2c9c\u2c9e\u2ca0\u2ca2\u2ca4\u2ca6\u2ca8\u2caa\u2cac\u2cae'
u'\u2cb0\u2cb2\u2cb4\u2cb6\u2cb8\u2cba\u2cbc\u2cbe\u2cc0\u2cc2'
u'\u2cc4\u2cc6\u2cc8\u2cca\u2ccc\u2cce\u2cd0\u2cd2\u2cd4\u2cd6'
u'\u2cd8\u2cda\u2cdc\u2cde\u2ce0\u2ce2\ua640\ua642\ua644\ua646'
u'\ua648\ua64a\ua64c\ua64e\ua650\ua652\ua654\ua656\ua658\ua65a'
u'\ua65c\ua65e\ua662\ua664\ua666\ua668\ua66a\ua66c\ua680\ua682'
u'\ua684\ua686\ua688\ua68a\ua68c\ua68e\ua690\ua692\ua694\ua696'
u'\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736'
u'\ua738\ua73a\ua73c\ua73e\ua740\ua742\ua744\ua746\ua748\ua74a'
u'\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e'
u'\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b'
u'\ua77d-\ua77e\ua780\ua782\ua784\ua786\ua78b\uff21-\uff3a]')
upper = ('[A-Z\\$_\u00c0-\u00d6\u00d8-\u00de\u0100\u0102\u0104\u0106\u0108'
'\u010a\u010c\u010e\u0110\u0112\u0114\u0116\u0118\u011a\u011c'
'\u011e\u0120\u0122\u0124\u0126\u0128\u012a\u012c\u012e\u0130'
'\u0132\u0134\u0136\u0139\u013b\u013d\u013f\u0141\u0143\u0145'
'\u0147\u014a\u014c\u014e\u0150\u0152\u0154\u0156\u0158\u015a'
'\u015c\u015e\u0160\u0162\u0164\u0166\u0168\u016a\u016c\u016e'
'\u0170\u0172\u0174\u0176\u0178-\u0179\u017b\u017d\u0181-\u0182'
'\u0184\u0186-\u0187\u0189-\u018b\u018e-\u0191\u0193-\u0194'
'\u0196-\u0198\u019c-\u019d\u019f-\u01a0\u01a2\u01a4\u01a6-\u01a7'
'\u01a9\u01ac\u01ae-\u01af\u01b1-\u01b3\u01b5\u01b7-\u01b8\u01bc'
'\u01c4\u01c7\u01ca\u01cd\u01cf\u01d1\u01d3\u01d5\u01d7\u01d9'
'\u01db\u01de\u01e0\u01e2\u01e4\u01e6\u01e8\u01ea\u01ec\u01ee'
'\u01f1\u01f4\u01f6-\u01f8\u01fa\u01fc\u01fe\u0200\u0202\u0204'
'\u0206\u0208\u020a\u020c\u020e\u0210\u0212\u0214\u0216\u0218'
'\u021a\u021c\u021e\u0220\u0222\u0224\u0226\u0228\u022a\u022c'
'\u022e\u0230\u0232\u023a-\u023b\u023d-\u023e\u0241\u0243-\u0246'
'\u0248\u024a\u024c\u024e\u0370\u0372\u0376\u0386\u0388-\u038f'
'\u0391-\u03ab\u03cf\u03d2-\u03d4\u03d8\u03da\u03dc\u03de\u03e0'
'\u03e2\u03e4\u03e6\u03e8\u03ea\u03ec\u03ee\u03f4\u03f7'
'\u03f9-\u03fa\u03fd-\u042f\u0460\u0462\u0464\u0466\u0468\u046a'
'\u046c\u046e\u0470\u0472\u0474\u0476\u0478\u047a\u047c\u047e'
'\u0480\u048a\u048c\u048e\u0490\u0492\u0494\u0496\u0498\u049a'
'\u049c\u049e\u04a0\u04a2\u04a4\u04a6\u04a8\u04aa\u04ac\u04ae'
'\u04b0\u04b2\u04b4\u04b6\u04b8\u04ba\u04bc\u04be\u04c0-\u04c1'
'\u04c3\u04c5\u04c7\u04c9\u04cb\u04cd\u04d0\u04d2\u04d4\u04d6'
'\u04d8\u04da\u04dc\u04de\u04e0\u04e2\u04e4\u04e6\u04e8\u04ea'
'\u04ec\u04ee\u04f0\u04f2\u04f4\u04f6\u04f8\u04fa\u04fc\u04fe'
'\u0500\u0502\u0504\u0506\u0508\u050a\u050c\u050e\u0510\u0512'
'\u0514\u0516\u0518\u051a\u051c\u051e\u0520\u0522\u0531-\u0556'
'\u10a0-\u10c5\u1e00\u1e02\u1e04\u1e06\u1e08\u1e0a\u1e0c\u1e0e'
'\u1e10\u1e12\u1e14\u1e16\u1e18\u1e1a\u1e1c\u1e1e\u1e20\u1e22'
'\u1e24\u1e26\u1e28\u1e2a\u1e2c\u1e2e\u1e30\u1e32\u1e34\u1e36'
'\u1e38\u1e3a\u1e3c\u1e3e\u1e40\u1e42\u1e44\u1e46\u1e48\u1e4a'
'\u1e4c\u1e4e\u1e50\u1e52\u1e54\u1e56\u1e58\u1e5a\u1e5c\u1e5e'
'\u1e60\u1e62\u1e64\u1e66\u1e68\u1e6a\u1e6c\u1e6e\u1e70\u1e72'
'\u1e74\u1e76\u1e78\u1e7a\u1e7c\u1e7e\u1e80\u1e82\u1e84\u1e86'
'\u1e88\u1e8a\u1e8c\u1e8e\u1e90\u1e92\u1e94\u1e9e\u1ea0\u1ea2'
'\u1ea4\u1ea6\u1ea8\u1eaa\u1eac\u1eae\u1eb0\u1eb2\u1eb4\u1eb6'
'\u1eb8\u1eba\u1ebc\u1ebe\u1ec0\u1ec2\u1ec4\u1ec6\u1ec8\u1eca'
'\u1ecc\u1ece\u1ed0\u1ed2\u1ed4\u1ed6\u1ed8\u1eda\u1edc\u1ede'
'\u1ee0\u1ee2\u1ee4\u1ee6\u1ee8\u1eea\u1eec\u1eee\u1ef0\u1ef2'
'\u1ef4\u1ef6\u1ef8\u1efa\u1efc\u1efe\u1f08-\u1f0f\u1f18-\u1f1d'
'\u1f28-\u1f2f\u1f38-\u1f3f\u1f48-\u1f4d\u1f59-\u1f5f'
'\u1f68-\u1f6f\u1fb8-\u1fbb\u1fc8-\u1fcb\u1fd8-\u1fdb'
'\u1fe8-\u1fec\u1ff8-\u1ffb\u2102\u2107\u210b-\u210d\u2110-\u2112'
'\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u2130-\u2133'
'\u213e-\u213f\u2145\u2183\u2c00-\u2c2e\u2c60\u2c62-\u2c64\u2c67'
'\u2c69\u2c6b\u2c6d-\u2c6f\u2c72\u2c75\u2c80\u2c82\u2c84\u2c86'
'\u2c88\u2c8a\u2c8c\u2c8e\u2c90\u2c92\u2c94\u2c96\u2c98\u2c9a'
'\u2c9c\u2c9e\u2ca0\u2ca2\u2ca4\u2ca6\u2ca8\u2caa\u2cac\u2cae'
'\u2cb0\u2cb2\u2cb4\u2cb6\u2cb8\u2cba\u2cbc\u2cbe\u2cc0\u2cc2'
'\u2cc4\u2cc6\u2cc8\u2cca\u2ccc\u2cce\u2cd0\u2cd2\u2cd4\u2cd6'
'\u2cd8\u2cda\u2cdc\u2cde\u2ce0\u2ce2\ua640\ua642\ua644\ua646'
'\ua648\ua64a\ua64c\ua64e\ua650\ua652\ua654\ua656\ua658\ua65a'
'\ua65c\ua65e\ua662\ua664\ua666\ua668\ua66a\ua66c\ua680\ua682'
'\ua684\ua686\ua688\ua68a\ua68c\ua68e\ua690\ua692\ua694\ua696'
'\ua722\ua724\ua726\ua728\ua72a\ua72c\ua72e\ua732\ua734\ua736'
'\ua738\ua73a\ua73c\ua73e\ua740\ua742\ua744\ua746\ua748\ua74a'
'\ua74c\ua74e\ua750\ua752\ua754\ua756\ua758\ua75a\ua75c\ua75e'
'\ua760\ua762\ua764\ua766\ua768\ua76a\ua76c\ua76e\ua779\ua77b'
'\ua77d-\ua77e\ua780\ua782\ua784\ua786\ua78b\uff21-\uff3a]')
idrest = ur'%s(?:%s|[0-9])*(?:(?<=_)%s)?' % (letter, letter, op)
idrest = r'%s(?:%s|[0-9])*(?:(?<=_)%s)?' % (letter, letter, op)
tokens = {
'root': [
# method names
(r'(class|trait|object)(\s+)', bygroups(Keyword, Text), 'class'),
(ur"'%s" % idrest, Text.Symbol),
(r"'%s" % idrest, Text.Symbol),
(r'[^\S\n]+', Text),
(r'//.*?\n', Comment.Single),
(r'/\*', Comment.Multiline, 'comment'),
(ur'@%s' % idrest, Name.Decorator),
(ur'(abstract|ca(?:se|tch)|d(?:ef|o)|e(?:lse|xtends)|'
ur'f(?:inal(?:ly)?|or(?:Some)?)|i(?:f|mplicit)|'
ur'lazy|match|new|override|pr(?:ivate|otected)'
ur'|re(?:quires|turn)|s(?:ealed|uper)|'
ur't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\b|'
u'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\\b|(?=\\s)|$)', Keyword),
(ur':(?!%s)' % op, Keyword, 'type'),
(ur'%s%s\b' % (upper, idrest), Name.Class),
(r'@%s' % idrest, Name.Decorator),
(r'(abstract|ca(?:se|tch)|d(?:ef|o)|e(?:lse|xtends)|'
r'f(?:inal(?:ly)?|or(?:Some)?)|i(?:f|mplicit)|'
r'lazy|match|new|override|pr(?:ivate|otected)'
r'|re(?:quires|turn)|s(?:ealed|uper)|'
r't(?:h(?:is|row)|ry)|va[lr]|w(?:hile|ith)|yield)\b|'
'(<[%:-]|=>|>:|[#=@_\u21D2\u2190])(\\b|(?=\\s)|$)', Keyword),
(r':(?!%s)' % op, Keyword, 'type'),
(r'%s%s\b' % (upper, idrest), Name.Class),
(r'(true|false|null)\b', Keyword.Constant),
(r'(import|package)(\s+)', bygroups(Keyword, Text), 'import'),
(r'(type)(\s+)', bygroups(Keyword, Text), 'type'),
@ -282,34 +282,34 @@ class ScalaLexer(RegexLexer):
(r'\n', Text)
],
'class': [
(ur'(%s|%s|`[^`]+`)(\s*)(\[)' % (idrest, op),
(r'(%s|%s|`[^`]+`)(\s*)(\[)' % (idrest, op),
bygroups(Name.Class, Text, Operator), 'typeparam'),
(r'\s+', Text),
(r'{', Operator, '#pop'),
(r'\(', Operator, '#pop'),
(r'//.*?\n', Comment.Single, '#pop'),
(ur'%s|%s|`[^`]+`' % (idrest, op), Name.Class, '#pop'),
(r'%s|%s|`[^`]+`' % (idrest, op), Name.Class, '#pop'),
],
'type': [
(r'\s+', Text),
(u'<[%:]|>:|[#_\u21D2]|forSome|type', Keyword),
('<[%:]|>:|[#_\u21D2]|forSome|type', Keyword),
(r'([,\);}]|=>|=)(\s*)', bygroups(Operator, Text), '#pop'),
(r'[\(\{]', Operator, '#push'),
(ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)(\[)' %
(r'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)(\[)' %
(idrest, op, idrest, op),
bygroups(Keyword.Type, Text, Operator), ('#pop', 'typeparam')),
(ur'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)$' %
(r'((?:%s|%s|`[^`]+`)(?:\.(?:%s|%s|`[^`]+`))*)(\s*)$' %
(idrest, op, idrest, op),
bygroups(Keyword.Type, Text), '#pop'),
(r'//.*?\n', Comment.Single, '#pop'),
(ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type)
(r'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type)
],
'typeparam': [
(r'[\s,]+', Text),
(u'<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword),
('<[%:]|=>|>:|[#_\u21D2]|forSome|type', Keyword),
(r'([\]\)\}])', Operator, '#pop'),
(r'[\(\[\{]', Operator, '#push'),
(ur'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type)
(r'\.|%s|%s|`[^`]+`' % (idrest, op), Keyword.Type)
],
'comment': [
(r'[^/\*]+', Comment.Multiline),
@ -318,7 +318,7 @@ class ScalaLexer(RegexLexer):
(r'[*/]', Comment.Multiline)
],
'import': [
(ur'(%s|\.)+' % idrest, Name.Namespace, '#pop')
(r'(%s|\.)+' % idrest, Name.Namespace, '#pop')
],
}
@ -639,9 +639,9 @@ class IokeLexer(RegexLexer):
r'System|Text|Tuple)(?![a-zA-Z0-9!:_?])', Name.Builtin),
# functions
(ur'(generateMatchMethod|aliasMethod|\u03bb|\u028E|fnx|fn|method|'
ur'dmacro|dlecro|syntax|macro|dlecrox|lecrox|lecro|syntax)'
ur'(?![a-zA-Z0-9!:_?])', Name.Function),
(r'(generateMatchMethod|aliasMethod|\u03bb|\u028E|fnx|fn|method|'
r'dmacro|dlecro|syntax|macro|dlecrox|lecrox|lecro|syntax)'
r'(?![a-zA-Z0-9!:_?])', Name.Function),
# Numbers
(r'-?0[xX][0-9a-fA-F]+', Number.Hex),
@ -651,13 +651,13 @@ class IokeLexer(RegexLexer):
(r'#\(', Punctuation),
# Operators
(ur'(&&>>|\|\|>>|\*\*>>|:::|::|\.\.\.|===|\*\*>|\*\*=|&&>|&&=|'
ur'\|\|>|\|\|=|\->>|\+>>|!>>|<>>>|<>>|&>>|%>>|#>>|@>>|/>>|\*>>|'
ur'\?>>|\|>>|\^>>|~>>|\$>>|=>>|<<=|>>=|<=>|<\->|=~|!~|=>|\+\+|'
ur'\-\-|<=|>=|==|!=|&&|\.\.|\+=|\-=|\*=|\/=|%=|&=|\^=|\|=|<\-|'
ur'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|'
ur'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|'
ur'\?|#|\u2260|\u2218|\u2208|\u2209)', Operator),
(r'(&&>>|\|\|>>|\*\*>>|:::|::|\.\.\.|===|\*\*>|\*\*=|&&>|&&=|'
r'\|\|>|\|\|=|\->>|\+>>|!>>|<>>>|<>>|&>>|%>>|#>>|@>>|/>>|\*>>|'
r'\?>>|\|>>|\^>>|~>>|\$>>|=>>|<<=|>>=|<=>|<\->|=~|!~|=>|\+\+|'
r'\-\-|<=|>=|==|!=|&&|\.\.|\+=|\-=|\*=|\/=|%=|&=|\^=|\|=|<\-|'
r'\+>|!>|<>|&>|%>|#>|\@>|\/>|\*>|\?>|\|>|\^>|~>|\$>|<\->|\->|'
r'<<|>>|\*\*|\?\||\?&|\|\||>|<|\*|\/|%|\+|\-|&|\^|\||=|\$|!|~|'
r'\?|#|\u2260|\u2218|\u2208|\u2209)', Operator),
(r'(and|nand|or|xor|nor|return|import)(?![a-zA-Z0-9_!?])',
Operator),
@ -983,7 +983,7 @@ class KotlinLexer(RegexLexer):
tokens = {}
token_variants = True
for levelname, cs_ident in levels.items():
for levelname, cs_ident in list(levels.items()):
tokens[levelname] = {
'root': [
# method names
@ -1036,7 +1036,7 @@ class KotlinLexer(RegexLexer):
}
def __init__(self, **options):
level = get_choice_opt(options, 'unicodelevel', self.tokens.keys(),
level = get_choice_opt(options, 'unicodelevel', list(self.tokens.keys()),
'basic')
if level not in self._all_tokens:
# compile the regexes now
@ -1087,7 +1087,7 @@ class XtendLexer(RegexLexer):
'class'),
(r'(import)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
(r"(''')", String, 'template'),
(ur"(\u00BB)", String, 'template'),
(r"(\u00BB)", String, 'template'),
(r'"(\\\\|\\"|[^"])*"', String),
(r"'(\\\\|\\'|[^'])*'", String),
(r'[a-zA-Z_][a-zA-Z0-9_]*:', Name.Label),
@ -1106,7 +1106,7 @@ class XtendLexer(RegexLexer):
],
'template': [
(r"'''", String, '#pop'),
(ur"\u00AB", String, '#pop'),
(r"\u00AB", String, '#pop'),
(r'.', String)
],
}

View file

@ -1150,18 +1150,18 @@ class AppleScriptLexer(RegexLexer):
tokens = {
'root': [
(r'\s+', Text),
(ur'¬\n', String.Escape),
(r'¬\n', String.Escape),
(r"'s\s+", Text), # This is a possessive, consider moving
(r'(--|#).*?$', Comment),
(r'\(\*', Comment.Multiline, 'comment'),
(r'[\(\){}!,.:]', Punctuation),
(ur'(«)([^»]+)(»)',
(r'(«)([^»]+)(»)',
bygroups(Text, Name.Builtin, Text)),
(r'\b((?:considering|ignoring)\s*)'
r'(application responses|case|diacriticals|hyphens|'
r'numeric strings|punctuation|white space)',
bygroups(Keyword, Name.Builtin)),
(ur'(-|\*|\+|&|≠|>=?|<=?|=|≥|≤|/|÷|\^)', Operator),
(r'(-|\*|\+|&|≠|>=?|<=?|=|≥|≤|/|÷|\^)', Operator),
(r"\b(%s)\b" % '|'.join(Operators), Operator.Word),
(r'^(\s*(?:on|end)\s+)'
r'(%s)' % '|'.join(StudioEvents[::-1]),
@ -1753,10 +1753,10 @@ class GherkinLexer(RegexLexer):
filenames = ['*.feature']
mimetypes = ['text/x-gherkin']
feature_keywords = ur'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$'
feature_element_keywords = ur'^(\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$'
examples_keywords = ur'^(\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$'
step_keywords = ur'^(\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )'
feature_keywords = r'^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$'
feature_element_keywords = r'^(\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l\'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y\'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$'
examples_keywords = r'^(\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$'
step_keywords = r'^(\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假如|但是|但し|並且|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y\'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y\'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu\'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y\'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y\'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y\'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )'
tokens = {
'comments': [
@ -3718,9 +3718,9 @@ class RexxLexer(RegexLexer):
r'while)\b', Keyword.Reserved),
],
'operator': [
(ur'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||'
ur'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|'
ur'¬>>|¬>|¬|\.|,)', Operator),
(r'(-|//|/|\(|\)|\*\*|\*|\\<<|\\<|\\==|\\=|\\>>|\\>|\\|\|\||\||'
r'&&|&|%|\+|<<=|<<|<=|<>|<|==|=|><|>=|>>=|>>|>|¬<<|¬<|¬==|¬=|'
r'¬>>|¬>|¬|\.|,)', Operator),
],
'string_double': [
(r'[^"\n]+', String),

View file

@ -10,7 +10,7 @@
"""
import re
import cStringIO
import io
from pygments.lexer import Lexer
from pygments.token import Token, Error, Text
@ -60,12 +60,12 @@ class RawTokenLexer(Lexer):
Lexer.__init__(self, **options)
def get_tokens(self, text):
if isinstance(text, unicode):
if isinstance(text, str):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, cStringIO.StringIO(text))
gzipfile = gzip.GzipFile('', 'rb', 9, io.StringIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2

View file

@ -176,7 +176,7 @@ class PlPgsqlLexer(PostgresBase, RegexLexer):
mimetypes = ['text/x-plpgsql']
flags = re.IGNORECASE
tokens = dict((k, l[:]) for (k, l) in PostgresLexer.tokens.iteritems())
tokens = dict((k, l[:]) for (k, l) in PostgresLexer.tokens.items())
# extend the keywords list
for i, pattern in enumerate(tokens['root']):
@ -210,7 +210,7 @@ class PsqlRegexLexer(PostgresBase, RegexLexer):
aliases = [] # not public
flags = re.IGNORECASE
tokens = dict((k, l[:]) for (k, l) in PostgresLexer.tokens.iteritems())
tokens = dict((k, l[:]) for (k, l) in PostgresLexer.tokens.items())
tokens['root'].append(
(r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
@ -244,12 +244,12 @@ class lookahead(object):
def send(self, i):
self._nextitem = i
return i
def next(self):
def __next__(self):
if self._nextitem is not None:
ni = self._nextitem
self._nextitem = None
return ni
return self.iter.next()
return next(self.iter)
class PostgresConsoleLexer(Lexer):
@ -277,7 +277,7 @@ class PostgresConsoleLexer(Lexer):
insertions = []
while 1:
try:
line = lines.next()
line = next(lines)
except StopIteration:
# allow the emission of partially collected items
# the repl loop will be broken below
@ -314,7 +314,7 @@ class PostgresConsoleLexer(Lexer):
# Emit the output lines
out_token = Generic.Output
while 1:
line = lines.next()
line = next(lines)
mprompt = re_prompt.match(line)
if mprompt is not None:
# push the line back to have it processed by the prompt

View file

@ -697,8 +697,8 @@ class RstLexer(RegexLexer):
yield item
# from docutils.parsers.rst.states
closers = u'\'")]}>\u2019\u201d\xbb!?'
unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
closers = '\'")]}>\u2019\u201d\xbb!?'
unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0'
end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
% (re.escape(unicode_delimiters),
re.escape(closers)))

View file

@ -886,7 +886,7 @@ class PhpLexer(RegexLexer):
self._functions = set()
if self.funcnamehighlighting:
from pygments.lexers._phpbuiltins import MODULES
for key, value in MODULES.iteritems():
for key, value in MODULES.items():
if key not in self.disabledmodules:
self._functions.update(value)
RegexLexer.__init__(self, **options)
@ -2350,7 +2350,7 @@ class SassLexer(ExtendedRegexLexer):
(r"\*/", Comment, '#pop'),
],
}
for group, common in common_sass_tokens.iteritems():
for group, common in common_sass_tokens.items():
tokens[group] = copy.copy(common)
tokens['value'].append((r'\n', Text, 'root'))
tokens['selector'].append((r'\n', Text, 'root'))
@ -2397,7 +2397,7 @@ class ScssLexer(RegexLexer):
(r"\*/", Comment, '#pop'),
],
}
for group, common in common_sass_tokens.iteritems():
for group, common in common_sass_tokens.items():
tokens[group] = copy.copy(common)
tokens['value'].extend([(r'\n', Text), (r'[;{}]', Punctuation, 'root')])
tokens['selector'].extend([(r'\n', Text), (r'[;{}]', Punctuation, 'root')])
@ -3284,7 +3284,7 @@ class XQueryLexer(ExtendedRegexLexer):
'xml_comment': [
(r'(-->)', popstate_xmlcomment_callback),
(r'[^-]{1,2}', Literal),
(ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
(r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
unirange(0x10000, 0x10ffff), Literal),
],
'processing_instruction': [
@ -3294,12 +3294,12 @@ class XQueryLexer(ExtendedRegexLexer):
],
'processing_instruction_content': [
(r'\?>', String.Doc, '#pop'),
(ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
(r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
unirange(0x10000, 0x10ffff), Literal),
],
'cdata_section': [
(r']]>', String.Doc, '#pop'),
(ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
(r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
unirange(0x10000, 0x10ffff), Literal),
],
'start_tag': [
@ -3368,7 +3368,7 @@ class XQueryLexer(ExtendedRegexLexer):
],
'pragmacontents': [
(r'#\)', Punctuation, 'operator'),
(ur'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
(r'\t|\r|\n|[\u0020-\uD7FF]|[\uE000-\uFFFD]|' +
unirange(0x10000, 0x10ffff), Literal),
(r'(\s+)', Text),
],
@ -3941,9 +3941,9 @@ class LassoLexer(RegexLexer):
self._members = set()
if self.builtinshighlighting:
from pygments.lexers._lassobuiltins import BUILTINS, MEMBERS
for key, value in BUILTINS.iteritems():
for key, value in BUILTINS.items():
self._builtins.update(value)
for key, value in MEMBERS.iteritems():
for key, value in MEMBERS.items():
self._members.update(value)
RegexLexer.__init__(self, **options)

View file

@ -104,10 +104,7 @@ class StyleMeta(type):
return len(cls._styles)
class Style(object):
__metaclass__ = StyleMeta
#: overall background color (``None`` means transparent)
class Style(object, metaclass=StyleMeta):
background_color = '#ffffff'
#: highlight background color

File diff suppressed because one or more lines are too long

View file

@ -52,7 +52,7 @@ def get_bool_opt(options, optname, default=None):
return string
elif isinstance(string, int):
return bool(string)
elif not isinstance(string, basestring):
elif not isinstance(string, str):
raise OptionError('Invalid type %r for option %s; use '
'1/0, yes/no, true/false, on/off' % (
string, optname))
@ -82,7 +82,7 @@ def get_int_opt(options, optname, default=None):
def get_list_opt(options, optname, default=None):
val = options.get(optname, default)
if isinstance(val, basestring):
if isinstance(val, str):
return val.split()
elif isinstance(val, (list, tuple)):
return list(val)
@ -222,7 +222,7 @@ def unirange(a, b):
if sys.maxunicode > 0xffff:
# wide build
return u'[%s-%s]' % (unichr(a), unichr(b))
return '[%s-%s]' % (chr(a), chr(b))
else:
# narrow build stores surrogates, and the 're' module handles them
# (incorrectly) as characters. Since there is still ordering among
@ -236,29 +236,29 @@ def unirange(a, b):
ah, al = _surrogatepair(a)
bh, bl = _surrogatepair(b)
if ah == bh:
return u'(?:%s[%s-%s])' % (unichr(ah), unichr(al), unichr(bl))
return '(?:%s[%s-%s])' % (chr(ah), chr(al), chr(bl))
else:
buf = []
buf.append(u'%s[%s-%s]' %
(unichr(ah), unichr(al),
ah == bh and unichr(bl) or unichr(0xdfff)))
buf.append('%s[%s-%s]' %
(chr(ah), chr(al),
ah == bh and chr(bl) or chr(0xdfff)))
if ah - bh > 1:
buf.append(u'[%s-%s][%s-%s]' %
unichr(ah+1), unichr(bh-1), unichr(0xdc00), unichr(0xdfff))
buf.append('[%s-%s][%s-%s]' %
chr(ah+1), chr(bh-1), chr(0xdc00), chr(0xdfff))
if ah != bh:
buf.append(u'%s[%s-%s]' %
(unichr(bh), unichr(0xdc00), unichr(bl)))
buf.append('%s[%s-%s]' %
(chr(bh), chr(0xdc00), chr(bl)))
return u'(?:' + u'|'.join(buf) + u')'
return '(?:' + '|'.join(buf) + ')'
# Python 2/3 compatibility
if sys.version_info < (3,0):
b = bytes = str
u_prefix = 'u'
import StringIO, cStringIO
BytesIO = cStringIO.StringIO
StringIO = StringIO.StringIO
import io, io
BytesIO = io.StringIO
StringIO = io.StringIO
uni_open = codecs.open
else:
import builtins
@ -266,7 +266,7 @@ else:
u_prefix = ''
def b(s):
if isinstance(s, str):
return bytes(map(ord, s))
return bytes(list(map(ord, s)))
elif isinstance(s, bytes):
return s
else: