# -*- coding: utf-8 -*- """ pygments.lexers.python ~~~~~~~~~~~~~~~~~~~~~~ Lexers for Python and related languages. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, using, \ default, words, combined, do_insertions from pygments.util import get_bool_opt, shebang_matches from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic, Other, Error from pygments import unistring as uni __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer', 'Python3Lexer', 'Python3TracebackLexer', 'CythonLexer', 'DgLexer', 'NumPyLexer'] line_re = re.compile('.*?\n') class PythonLexer(RegexLexer): """ For `Python `_ source code. """ name = 'Python' aliases = ['python', 'py', 'sage'] filenames = ['*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac', '*.sage'] mimetypes = ['text/x-python', 'application/x-python'] def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%', ttype), # newlines are an error (use "nl" state) ] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")', bygroups(Text, String.Affix, String.Doc)), (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')", bygroups(Text, String.Affix, String.Doc)), (r'[^\S\n]+', Text), (r'\A#!.+$', Comment.Hashbang), (r'#.*$', Comment.Single), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator), include('keywords'), (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'), (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'), (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'fromimport'), (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text), 'import'), include('builtins'), include('magicfuncs'), include('magicvars'), include('backtick'), ('([rR]|[uUbB][rR]|[rR][uUbB])(""")', bygroups(String.Affix, String.Double), 'tdqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(''')", bygroups(String.Affix, String.Single), 'tsqs'), ('([rR]|[uUbB][rR]|[rR][uUbB])(")', bygroups(String.Affix, String.Double), 'dqs'), ("([rR]|[uUbB][rR]|[rR][uUbB])(')", bygroups(String.Affix, String.Single), 'sqs'), ('([uUbB]?)(""")', bygroups(String.Affix, String.Double), combined('stringescape', 'tdqs')), ("([uUbB]?)(''')", bygroups(String.Affix, String.Single), combined('stringescape', 'tsqs')), ('([uUbB]?)(")', bygroups(String.Affix, String.Double), combined('stringescape', 'dqs')), ("([uUbB]?)(')", bygroups(String.Affix, String.Single), combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(( 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except', 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with'), suffix=r'\b'), Keyword), ], 'builtins': [ (words(( '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'vars', 'xrange', 'zip'), prefix=r'(?`_ source code (version 3.0). .. versionadded:: 0.10 """ name = 'Python 3' aliases = ['python3', 'py3'] filenames = [] # Nothing until Python 3 gets widespread mimetypes = ['text/x-python3', 'application/x-python3'] flags = re.MULTILINE | re.UNICODE uni_name = "[%s][%s]*" % (uni.xid_start, uni.xid_continue) def innerstring_rules(ttype): return [ # the old style '%s' % (...) string formatting (still valid in Py3) (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[E-GXc-giorsux%]', String.Interpol), # the new style '{}'.format(...) string formatting (r'\{' '((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name '(\![sra])?' # conversion '(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?' '\}', String.Interpol), # backslashes, quotes and formatting signs must be parsed one at a time (r'[^\\\'"%{\n]+', ttype), (r'[\'"\\]', ttype), # unhandled string formatting sign (r'%|(\{{1,2})', ttype) # newlines are an error (use "nl" state) ] tokens = PythonLexer.tokens.copy() tokens['keywords'] = [ (words(( 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif', 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda', 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield', 'yield from', 'as', 'with'), suffix=r'\b'), Keyword), (words(( 'True', 'False', 'None'), suffix=r'\b'), Keyword.Constant), ] tokens['builtins'] = [ (words(( '__import__', 'abs', 'all', 'any', 'bin', 'bool', 'bytearray', 'bytes', 'chr', 'classmethod', 'cmp', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max', 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'vars', 'zip'), prefix=r'(?>> a = 'foo' >>> print a foo >>> 1 / 0 Traceback (most recent call last): File "", line 1, in ZeroDivisionError: integer division or modulo by zero Additional options: `python3` Use Python 3 lexer for code. Default is ``False``. .. versionadded:: 1.0 """ name = 'Python console session' aliases = ['pycon'] mimetypes = ['text/x-python-doctest'] def __init__(self, **options): self.python3 = get_bool_opt(options, 'python3', False) Lexer.__init__(self, **options) def get_tokens_unprocessed(self, text): if self.python3: pylexer = Python3Lexer(**self.options) tblexer = Python3TracebackLexer(**self.options) else: pylexer = PythonLexer(**self.options) tblexer = PythonTracebackLexer(**self.options) curcode = '' insertions = [] curtb = '' tbindex = 0 tb = 0 for match in line_re.finditer(text): line = match.group() if line.startswith(u'>>> ') or line.startswith(u'... '): tb = 0 insertions.append((len(curcode), [(0, Generic.Prompt, line[:4])])) curcode += line[4:] elif line.rstrip() == u'...' and not tb: # only a new >>> prompt can end an exception block # otherwise an ellipsis in place of the traceback frames # will be mishandled insertions.append((len(curcode), [(0, Generic.Prompt, u'...')])) curcode += line[3:] else: if curcode: for item in do_insertions( insertions, pylexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if (line.startswith(u'Traceback (most recent call last):') or re.match(u' File "[^"]+", line \\d+\\n$', line)): tb = 1 curtb = line tbindex = match.start() elif line == 'KeyboardInterrupt\n': yield match.start(), Name.Class, line elif tb: curtb += line if not (line.startswith(' ') or line.strip() == u'...'): tb = 0 for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v curtb = '' else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, pylexer.get_tokens_unprocessed(curcode)): yield item if curtb: for i, t, v in tblexer.get_tokens_unprocessed(curtb): yield tbindex+i, t, v class PythonTracebackLexer(RegexLexer): """ For Python tracebacks. .. versionadded:: 0.7 """ name = 'Python Traceback' aliases = ['pytb'] filenames = ['*.pytb'] mimetypes = ['text/x-python-traceback'] tokens = { 'root': [ # Cover both (most recent call last) and (innermost last) # The optional ^C allows us to catch keyboard interrupt signals. (r'^(\^C)?(Traceback.*\n)', bygroups(Text, Generic.Traceback), 'intb'), # SyntaxError starts with this. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), (r'^.*\n', Other), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( File )("[^"]+")(, line )(\d+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(PythonLexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } class Python3TracebackLexer(RegexLexer): """ For Python 3.0 tracebacks, with support for chained exceptions. .. versionadded:: 1.0 """ name = 'Python 3.0 Traceback' aliases = ['py3tb'] filenames = ['*.py3tb'] mimetypes = ['text/x-python3-traceback'] tokens = { 'root': [ (r'\n', Text), (r'^Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'), (r'^During handling of the above exception, another ' r'exception occurred:\n\n', Generic.Traceback), (r'^The above exception was the direct cause of the ' r'following exception:\n\n', Generic.Traceback), (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'), ], 'intb': [ (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text, Name, Text)), (r'^( File )("[^"]+")(, line )(\d+)(\n)', bygroups(Text, Name.Builtin, Text, Number, Text)), (r'^( )(.+)(\n)', bygroups(Text, using(Python3Lexer), Text)), (r'^([ \t]*)(\.\.\.)(\n)', bygroups(Text, Comment, Text)), # for doctests... (r'^([^:]+)(: )(.+)(\n)', bygroups(Generic.Error, Text, Name, Text), '#pop'), (r'^([a-zA-Z_]\w*)(:?\n)', bygroups(Generic.Error, Text), '#pop') ], } class CythonLexer(RegexLexer): """ For Pyrex and `Cython `_ source code. .. versionadded:: 1.1 """ name = 'Cython' aliases = ['cython', 'pyx', 'pyrex'] filenames = ['*.pyx', '*.pxd', '*.pxi'] mimetypes = ['text/x-cython', 'application/x-cython'] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), (r'#.*$', Comment), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'(<)([a-zA-Z0-9.?]+)(>)', bygroups(Punctuation, Keyword.Type, Punctuation)), (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', bygroups(Keyword, Number.Integer, Operator, Name, Operator, Name, Punctuation)), include('keywords'), (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), # (should actually start a block with only cdefs) (r'(cdef)(:)', bygroups(Keyword, Punctuation)), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), include('builtins'), include('backtick'), ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), ('[uU]?"""', String, combined('stringescape', 'tdqs')), ("[uU]?'''", String, combined('stringescape', 'tsqs')), ('[uU]?"', String, combined('stringescape', 'dqs')), ("[uU]?'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (words(( 'assert', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif', 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil', 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print', 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'), Keyword), (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), ], 'builtins': [ (words(( '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned', 'vars', 'xrange', 'zip'), prefix=r'(?`_, a functional and object-oriented programming language running on the CPython 3 VM. .. versionadded:: 1.6 """ name = 'dg' aliases = ['dg'] filenames = ['*.dg'] mimetypes = ['text/x-dg'] tokens = { 'root': [ (r'\s+', Text), (r'#.*?$', Comment.Single), (r'(?i)0b[01]+', Number.Bin), (r'(?i)0o[0-7]+', Number.Oct), (r'(?i)0x[0-9a-f]+', Number.Hex), (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float), (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float), (r'(?i)[+-]?[0-9]+j?', Number.Integer), (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')), (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')), (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')), (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')), (r"`\w+'*`", Operator), (r'\b(and|in|is|or|where)\b', Operator.Word), (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator), (words(( 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'', 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object', 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str', 'super', 'tuple', 'tuple\'', 'type'), prefix=r'(?