vim-rana-local/plugin/packages/wakatime/packages/pygments_py2/pygments/filters/__init__.py

351 lines
11 KiB
Python
Raw Normal View History

2013-09-22 23:22:11 +00:00
# -*- coding: utf-8 -*-
"""
pygments.filters
~~~~~~~~~~~~~~~~
Module containing filter lookup functions and default
filters.
2014-12-01 06:19:45 +00:00
:copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
2013-09-22 23:22:11 +00:00
:license: BSD, see LICENSE for details.
"""
import re
from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
string_to_tokentype
from pygments.filter import Filter
from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
2014-12-01 06:19:45 +00:00
get_choice_opt, ClassNotFound, OptionError, text_type, string_types
2013-09-22 23:22:11 +00:00
from pygments.plugin import find_plugin_filters
def find_filter_class(filtername):
2014-12-01 06:19:45 +00:00
"""Lookup a filter by name. Return None if not found."""
2013-09-22 23:22:11 +00:00
if filtername in FILTERS:
return FILTERS[filtername]
for name, cls in find_plugin_filters():
if name == filtername:
return cls
return None
def get_filter_by_name(filtername, **options):
2014-12-01 06:19:45 +00:00
"""Return an instantiated filter.
Options are passed to the filter initializer if wanted.
Raise a ClassNotFound if not found.
2013-09-22 23:22:11 +00:00
"""
cls = find_filter_class(filtername)
if cls:
return cls(**options)
else:
raise ClassNotFound('filter %r not found' % filtername)
def get_all_filters():
2014-12-01 06:19:45 +00:00
"""Return a generator of all filter names."""
2013-09-22 23:22:11 +00:00
for name in FILTERS:
yield name
for name, _ in find_plugin_filters():
yield name
def _replace_special(ttype, value, regex, specialttype,
replacefunc=lambda x: x):
last = 0
for match in regex.finditer(value):
start, end = match.start(), match.end()
if start != last:
yield ttype, value[last:start]
yield specialttype, replacefunc(value[start:end])
last = end
if last != len(value):
yield ttype, value[last:]
class CodeTagFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Highlight special code tags in comments and docstrings.
2013-09-22 23:22:11 +00:00
Options accepted:
`codetags` : list of strings
A list of strings that are flagged as code tags. The default is to
highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
"""
def __init__(self, **options):
Filter.__init__(self, **options)
tags = get_list_opt(options, 'codetags',
['XXX', 'TODO', 'BUG', 'NOTE'])
self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
re.escape(tag) for tag in tags if tag
]))
def filter(self, lexer, stream):
regex = self.tag_re
for ttype, value in stream:
if ttype in String.Doc or \
ttype in Comment and \
ttype not in Comment.Preproc:
for sttype, svalue in _replace_special(ttype, value, regex,
Comment.Special):
yield sttype, svalue
else:
yield ttype, value
class KeywordCaseFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Convert keywords to lowercase or uppercase or capitalize them, which
2013-09-22 23:22:11 +00:00
means first letter uppercase, rest lowercase.
This can be useful e.g. if you highlight Pascal code and want to adapt the
code to your styleguide.
Options accepted:
`case` : string
The casing to convert keywords to. Must be one of ``'lower'``,
``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
"""
def __init__(self, **options):
Filter.__init__(self, **options)
2014-12-01 06:19:45 +00:00
case = get_choice_opt(options, 'case',
['lower', 'upper', 'capitalize'], 'lower')
self.convert = getattr(text_type, case)
2013-09-22 23:22:11 +00:00
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype in Keyword:
yield ttype, self.convert(value)
else:
yield ttype, value
class NameHighlightFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Highlight a normal Name (and Name.*) token with a different token type.
2013-09-22 23:22:11 +00:00
Example::
filter = NameHighlightFilter(
names=['foo', 'bar', 'baz'],
tokentype=Name.Function,
)
This would highlight the names "foo", "bar" and "baz"
as functions. `Name.Function` is the default token type.
Options accepted:
`names` : list of strings
A list of names that should be given the different token type.
There is no default.
`tokentype` : TokenType or string
A token type or a string containing a token type name that is
used for highlighting the strings in `names`. The default is
`Name.Function`.
"""
def __init__(self, **options):
Filter.__init__(self, **options)
self.names = set(get_list_opt(options, 'names', []))
tokentype = options.get('tokentype')
if tokentype:
self.tokentype = string_to_tokentype(tokentype)
else:
self.tokentype = Name.Function
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype in Name and value in self.names:
yield self.tokentype, value
else:
yield ttype, value
class ErrorToken(Exception):
pass
2014-12-01 06:19:45 +00:00
2013-09-22 23:22:11 +00:00
class RaiseOnErrorTokenFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Raise an exception when the lexer generates an error token.
2013-09-22 23:22:11 +00:00
Options accepted:
`excclass` : Exception class
The exception class to raise.
The default is `pygments.filters.ErrorToken`.
2014-12-01 06:19:45 +00:00
.. versionadded:: 0.8
2013-09-22 23:22:11 +00:00
"""
def __init__(self, **options):
Filter.__init__(self, **options)
self.exception = options.get('excclass', ErrorToken)
try:
# issubclass() will raise TypeError if first argument is not a class
if not issubclass(self.exception, Exception):
raise TypeError
except TypeError:
raise OptionError('excclass option is not an exception class')
def filter(self, lexer, stream):
for ttype, value in stream:
if ttype is Error:
raise self.exception(value)
yield ttype, value
class VisibleWhitespaceFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Convert tabs, newlines and/or spaces to visible characters.
2013-09-22 23:22:11 +00:00
Options accepted:
`spaces` : string or bool
If this is a one-character string, spaces will be replaces by this string.
If it is another true value, spaces will be replaced by ``·`` (unicode
MIDDLE DOT). If it is a false value, spaces will not be replaced. The
default is ``False``.
`tabs` : string or bool
The same as for `spaces`, but the default replacement character is ``»``
(unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
is ``False``. Note: this will not work if the `tabsize` option for the
lexer is nonzero, as tabs will already have been expanded then.
`tabsize` : int
If tabs are to be replaced by this filter (see the `tabs` option), this
is the total number of characters that a tab should be expanded to.
The default is ``8``.
`newlines` : string or bool
The same as for `spaces`, but the default replacement character is ````
(unicode PILCROW SIGN). The default value is ``False``.
`wstokentype` : bool
If true, give whitespace the special `Whitespace` token type. This allows
styling the visible whitespace differently (e.g. greyed out), but it can
disrupt background colors. The default is ``True``.
2014-12-01 06:19:45 +00:00
.. versionadded:: 0.8
2013-09-22 23:22:11 +00:00
"""
def __init__(self, **options):
Filter.__init__(self, **options)
2014-12-01 06:19:45 +00:00
for name, default in [('spaces', u'·'),
('tabs', u'»'),
('newlines', u'')]:
2013-09-22 23:22:11 +00:00
opt = options.get(name, False)
2014-12-01 06:19:45 +00:00
if isinstance(opt, string_types) and len(opt) == 1:
2013-09-22 23:22:11 +00:00
setattr(self, name, opt)
else:
setattr(self, name, (opt and default or ''))
tabsize = get_int_opt(options, 'tabsize', 8)
if self.tabs:
2014-12-01 06:19:45 +00:00
self.tabs += ' ' * (tabsize - 1)
2013-09-22 23:22:11 +00:00
if self.newlines:
self.newlines += '\n'
self.wstt = get_bool_opt(options, 'wstokentype', True)
def filter(self, lexer, stream):
if self.wstt:
2014-12-01 06:19:45 +00:00
spaces = self.spaces or u' '
tabs = self.tabs or u'\t'
newlines = self.newlines or u'\n'
2013-09-22 23:22:11 +00:00
regex = re.compile(r'\s')
def replacefunc(wschar):
if wschar == ' ':
return spaces
elif wschar == '\t':
return tabs
elif wschar == '\n':
return newlines
return wschar
for ttype, value in stream:
for sttype, svalue in _replace_special(ttype, value, regex,
Whitespace, replacefunc):
yield sttype, svalue
else:
spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
# simpler processing
for ttype, value in stream:
if spaces:
value = value.replace(' ', spaces)
if tabs:
value = value.replace('\t', tabs)
if newlines:
value = value.replace('\n', newlines)
yield ttype, value
class GobbleFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Gobbles source code lines (eats initial characters).
2013-09-22 23:22:11 +00:00
This filter drops the first ``n`` characters off every line of code. This
may be useful when the source code fed to the lexer is indented by a fixed
amount of space that isn't desired in the output.
Options accepted:
`n` : int
The number of characters to gobble.
2014-12-01 06:19:45 +00:00
.. versionadded:: 1.2
2013-09-22 23:22:11 +00:00
"""
def __init__(self, **options):
Filter.__init__(self, **options)
self.n = get_int_opt(options, 'n', 0)
def gobble(self, value, left):
if left < len(value):
return value[left:], 0
else:
2014-12-01 06:19:45 +00:00
return u'', left - len(value)
2013-09-22 23:22:11 +00:00
def filter(self, lexer, stream):
n = self.n
left = n # How many characters left to gobble.
for ttype, value in stream:
# Remove ``left`` tokens from first line, ``n`` from all others.
parts = value.split('\n')
(parts[0], left) = self.gobble(parts[0], left)
for i in range(1, len(parts)):
(parts[i], left) = self.gobble(parts[i], n)
2014-12-01 06:19:45 +00:00
value = u'\n'.join(parts)
2013-09-22 23:22:11 +00:00
if value != '':
yield ttype, value
class TokenMergeFilter(Filter):
2014-12-01 06:19:45 +00:00
"""Merges consecutive tokens with the same token type in the output
stream of a lexer.
2013-09-22 23:22:11 +00:00
2014-12-01 06:19:45 +00:00
.. versionadded:: 1.2
2013-09-22 23:22:11 +00:00
"""
def __init__(self, **options):
Filter.__init__(self, **options)
def filter(self, lexer, stream):
current_type = None
current_value = None
for ttype, value in stream:
if ttype is current_type:
current_value += value
else:
if current_type is not None:
yield current_type, current_value
current_type = ttype
current_value = value
if current_type is not None:
yield current_type, current_value
FILTERS = {
'codetagify': CodeTagFilter,
'keywordcase': KeywordCaseFilter,
'highlight': NameHighlightFilter,
'raiseonerror': RaiseOnErrorTokenFilter,
'whitespace': VisibleWhitespaceFilter,
'gobble': GobbleFilter,
'tokenmerge': TokenMergeFilter,
}