# -*- coding: utf-8 -*- """ pygments.lexers.text ~~~~~~~~~~~~~~~~~~~~ Lexers for non-source code file types. :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from bisect import bisect from pygments.lexer import Lexer, LexerContext, RegexLexer, ExtendedRegexLexer, \ bygroups, include, using, this, do_insertions from pygments.token import Punctuation, Text, Comment, Keyword, Name, String, \ Generic, Operator, Number, Whitespace, Literal from pygments.util import get_bool_opt, ClassNotFound from pygments.lexers.other import BashLexer __all__ = ['IniLexer', 'PropertiesLexer', 'SourcesListLexer', 'BaseMakefileLexer', 'MakefileLexer', 'DiffLexer', 'IrcLogsLexer', 'TexLexer', 'GroffLexer', 'ApacheConfLexer', 'BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'VimLexer', 'GettextLexer', 'SquidConfLexer', 'DebianControlLexer', 'DarcsPatchLexer', 'YamlLexer', 'LighttpdConfLexer', 'NginxConfLexer', 'CMakeLexer', 'HttpLexer', 'PyPyLogLexer', 'RegeditLexer', 'HxmlLexer', 'EbnfLexer'] class IniLexer(RegexLexer): """ Lexer for configuration files in INI style. """ name = 'INI' aliases = ['ini', 'cfg', 'dosini'] filenames = ['*.ini', '*.cfg'] mimetypes = ['text/x-ini'] tokens = { 'root': [ (r'\s+', Text), (r'[;#].*', Comment.Single), (r'\[.*?\]$', Keyword), (r'(.*?)([ \t]*)(=)([ \t]*)(.*(?:\n[ \t].+)*)', bygroups(Name.Attribute, Text, Operator, Text, String)) ] } def analyse_text(text): npos = text.find('\n') if npos < 3: return False return text[0] == '[' and text[npos-1] == ']' class RegeditLexer(RegexLexer): """ Lexer for `Windows Registry `_ files produced by regedit. *New in Pygments 1.6.* """ name = 'reg' aliases = ['registry'] filenames = ['*.reg'] mimetypes = ['text/x-windows-registry'] tokens = { 'root': [ (r'Windows Registry Editor.*', Text), (r'\s+', Text), (r'[;#].*', Comment.Single), (r'(\[)(-?)(HKEY_[A-Z_]+)(.*?\])$', bygroups(Keyword, Operator, Name.Builtin, Keyword)), # String keys, which obey somewhat normal escaping (r'("(?:\\"|\\\\|[^"])+")([ \t]*)(=)([ \t]*)', bygroups(Name.Attribute, Text, Operator, Text), 'value'), # Bare keys (includes @) (r'(.*?)([ \t]*)(=)([ \t]*)', bygroups(Name.Attribute, Text, Operator, Text), 'value'), ], 'value': [ (r'-', Operator, '#pop'), # delete value (r'(dword|hex(?:\([0-9a-fA-F]\))?)(:)([0-9a-fA-F,]+)', bygroups(Name.Variable, Punctuation, Number), '#pop'), # As far as I know, .reg files do not support line continuation. (r'.*', String, '#pop'), ] } def analyse_text(text): return text.startswith('Windows Registry Editor') class PropertiesLexer(RegexLexer): """ Lexer for configuration files in Java's properties format. *New in Pygments 1.4.* """ name = 'Properties' aliases = ['properties', 'jproperties'] filenames = ['*.properties'] mimetypes = ['text/x-java-properties'] tokens = { 'root': [ (r'\s+', Text), (r'(?:[;#]|//).*$', Comment), (r'(.*?)([ \t]*)([=:])([ \t]*)(.*(?:(?<=\\)\n.*)*)', bygroups(Name.Attribute, Text, Operator, Text, String)), ], } class SourcesListLexer(RegexLexer): """ Lexer that highlights debian sources.list files. *New in Pygments 0.7.* """ name = 'Debian Sourcelist' aliases = ['sourceslist', 'sources.list', 'debsources'] filenames = ['sources.list'] mimetype = ['application/x-debian-sourceslist'] tokens = { 'root': [ (r'\s+', Text), (r'#.*?$', Comment), (r'^(deb(?:-src)?)(\s+)', bygroups(Keyword, Text), 'distribution') ], 'distribution': [ (r'#.*?$', Comment, '#pop'), (r'\$\(ARCH\)', Name.Variable), (r'[^\s$[]+', String), (r'\[', String.Other, 'escaped-distribution'), (r'\$', String), (r'\s+', Text, 'components') ], 'escaped-distribution': [ (r'\]', String.Other, '#pop'), (r'\$\(ARCH\)', Name.Variable), (r'[^\]$]+', String.Other), (r'\$', String.Other) ], 'components': [ (r'#.*?$', Comment, '#pop:2'), (r'$', Text, '#pop:2'), (r'\s+', Text), (r'\S+', Keyword.Pseudo), ] } def analyse_text(text): for line in text.split('\n'): line = line.strip() if not (line.startswith('#') or line.startswith('deb ') or line.startswith('deb-src ') or not line): return False return True class MakefileLexer(Lexer): """ Lexer for BSD and GNU make extensions (lenient enough to handle both in the same file even). *Rewritten in Pygments 0.10.* """ name = 'Makefile' aliases = ['make', 'makefile', 'mf', 'bsdmake'] filenames = ['*.mak', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'] mimetypes = ['text/x-makefile'] r_special = re.compile(r'^(?:' # BSD Make r'\.\s*(include|undef|error|warning|if|else|elif|endif|for|endfor)|' # GNU Make r'\s*(ifeq|ifneq|ifdef|ifndef|else|endif|-?include|define|endef|:))(?=\s)') r_comment = re.compile(r'^\s*@?#') def get_tokens_unprocessed(self, text): ins = [] lines = text.splitlines(True) done = '' lex = BaseMakefileLexer(**self.options) backslashflag = False for line in lines: if self.r_special.match(line) or backslashflag: ins.append((len(done), [(0, Comment.Preproc, line)])) backslashflag = line.strip().endswith('\\') elif self.r_comment.match(line): ins.append((len(done), [(0, Comment, line)])) else: done += line for item in do_insertions(ins, lex.get_tokens_unprocessed(done)): yield item class BaseMakefileLexer(RegexLexer): """ Lexer for simple Makefiles (no preprocessing). *New in Pygments 0.10.* """ name = 'Base Makefile' aliases = ['basemake'] filenames = [] mimetypes = [] tokens = { 'root': [ (r'^(?:[\t ]+.*\n|\n)+', using(BashLexer)), (r'\$\((?:.*\\\n|.*\n)+', using(BashLexer)), (r'\s+', Text), (r'#.*?\n', Comment), (r'(export)(\s+)(?=[a-zA-Z0-9_${}\t -]+\n)', bygroups(Keyword, Text), 'export'), (r'export\s+', Keyword), # assignment (r'([a-zA-Z0-9_${}.-]+)(\s*)([!?:+]?=)([ \t]*)((?:.*\\\n)+|.*\n)', bygroups(Name.Variable, Text, Operator, Text, using(BashLexer))), # strings (r'(?s)"(\\\\|\\.|[^"\\])*"', String.Double), (r"(?s)'(\\\\|\\.|[^'\\])*'", String.Single), # targets (r'([^\n:]+)(:+)([ \t]*)', bygroups(Name.Function, Operator, Text), 'block-header'), # TODO: add paren handling (grr) ], 'export': [ (r'[a-zA-Z0-9_${}-]+', Name.Variable), (r'\n', Text, '#pop'), (r'\s+', Text), ], 'block-header': [ (r'[^,\\\n#]+', Number), (r',', Punctuation), (r'#.*?\n', Comment), (r'\\\n', Text), # line continuation (r'\\.', Text), (r'(?:[\t ]+.*\n|\n)+', using(BashLexer), '#pop'), ], } class DiffLexer(RegexLexer): """ Lexer for unified or context-style diffs or patches. """ name = 'Diff' aliases = ['diff', 'udiff'] filenames = ['*.diff', '*.patch'] mimetypes = ['text/x-diff', 'text/x-patch'] tokens = { 'root': [ (r' .*\n', Text), (r'\+.*\n', Generic.Inserted), (r'-.*\n', Generic.Deleted), (r'!.*\n', Generic.Strong), (r'@.*\n', Generic.Subheading), (r'([Ii]ndex|diff).*\n', Generic.Heading), (r'=.*\n', Generic.Heading), (r'.*\n', Text), ] } def analyse_text(text): if text[:7] == 'Index: ': return True if text[:5] == 'diff ': return True if text[:4] == '--- ': return 0.9 DPATCH_KEYWORDS = ['hunk', 'addfile', 'adddir', 'rmfile', 'rmdir', 'move', 'replace'] class DarcsPatchLexer(RegexLexer): """ DarcsPatchLexer is a lexer for the various versions of the darcs patch format. Examples of this format are derived by commands such as ``darcs annotate --patch`` and ``darcs send``. *New in Pygments 0.10.* """ name = 'Darcs Patch' aliases = ['dpatch'] filenames = ['*.dpatch', '*.darcspatch'] tokens = { 'root': [ (r'<', Operator), (r'>', Operator), (r'{', Operator), (r'}', Operator), (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)(\])', bygroups(Operator, Keyword, Name, Text, Name, Operator, Literal.Date, Text, Operator)), (r'(\[)((?:TAG )?)(.*)(\n)(.*)(\*\*)(\d+)(\s?)', bygroups(Operator, Keyword, Name, Text, Name, Operator, Literal.Date, Text), 'comment'), (r'New patches:', Generic.Heading), (r'Context:', Generic.Heading), (r'Patch bundle hash:', Generic.Heading), (r'(\s*)(%s)(.*\n)' % '|'.join(DPATCH_KEYWORDS), bygroups(Text, Keyword, Text)), (r'\+', Generic.Inserted, "insert"), (r'-', Generic.Deleted, "delete"), (r'.*\n', Text), ], 'comment': [ (r'[^\]].*\n', Comment), (r'\]', Operator, "#pop"), ], 'specialText': [ # darcs add [_CODE_] special operators for clarity (r'\n', Text, "#pop"), # line-based (r'\[_[^_]*_]', Operator), ], 'insert': [ include('specialText'), (r'\[', Generic.Inserted), (r'[^\n\[]+', Generic.Inserted), ], 'delete': [ include('specialText'), (r'\[', Generic.Deleted), (r'[^\n\[]+', Generic.Deleted), ], } class IrcLogsLexer(RegexLexer): """ Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. """ name = 'IRC logs' aliases = ['irc'] filenames = ['*.weechatlog'] mimetypes = ['text/x-irclog'] flags = re.VERBOSE | re.MULTILINE timestamp = r""" ( # irssi / xchat and others (?: \[|\()? # Opening bracket or paren for the timestamp (?: # Timestamp (?: (?:\d{1,4} [-/]?)+ # Date as - or /-separated groups of digits [T ])? # Date/time separator: T or space (?: \d?\d [:.]?)+ # Time as :/.-separated groups of 1 or 2 digits ) (?: \]|\))?\s+ # Closing bracket or paren for the timestamp | # weechat \d{4}\s\w{3}\s\d{2}\s # Date \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace | # xchat \w{3}\s\d{2}\s # Date \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace )? """ tokens = { 'root': [ # log start/end (r'^\*\*\*\*(.*)\*\*\*\*$', Comment), # hack ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)), # normal msgs ("^" + timestamp + r""" (\s*<.*?>\s*) # Nick """, bygroups(Comment.Preproc, Name.Tag), 'msg'), # /me msgs ("^" + timestamp + r""" (\s*[*]\s+) # Star (\S+\s+.*?\n) # Nick + rest of message """, bygroups(Comment.Preproc, Keyword, Generic.Inserted)), # join/part msgs ("^" + timestamp + r""" (\s*(?:\*{3}|?)\s*) # Star(s) or symbols (\S+\s+) # Nick + Space (.*?\n) # Rest of message """, bygroups(Comment.Preproc, Keyword, String, Comment)), (r"^.*?\n", Text), ], 'msg': [ (r"\S+:(?!//)", Name.Attribute), # Prefix (r".*\n", Text, '#pop'), ], } class BBCodeLexer(RegexLexer): """ A lexer that highlights BBCode(-like) syntax. *New in Pygments 0.6.* """ name = 'BBCode' aliases = ['bbcode'] mimetypes = ['text/x-bbcode'] tokens = { 'root': [ (r'[^[]+', Text), # tag/end tag begin (r'\[/?\w+', Keyword, 'tag'), # stray bracket (r'\[', Text), ], 'tag': [ (r'\s+', Text), # attribute with value (r'(\w+)(=)("?[^\s"\]]+"?)', bygroups(Name.Attribute, Operator, String)), # tag argument (a la [color=green]) (r'(=)("?[^\s"\]]+"?)', bygroups(Operator, String)), # tag end (r'\]', Keyword, '#pop'), ], } class TexLexer(RegexLexer): """ Lexer for the TeX and LaTeX typesetting languages. """ name = 'TeX' aliases = ['tex', 'latex'] filenames = ['*.tex', '*.aux', '*.toc'] mimetypes = ['text/x-tex', 'text/x-latex'] tokens = { 'general': [ (r'%.*?\n', Comment), (r'[{}]', Name.Builtin), (r'[&_^]', Name.Builtin), ], 'root': [ (r'\\\[', String.Backtick, 'displaymath'), (r'\\\(', String, 'inlinemath'), (r'\$\$', String.Backtick, 'displaymath'), (r'\$', String, 'inlinemath'), (r'\\([a-zA-Z]+|.)', Keyword, 'command'), include('general'), (r'[^\\$%&_^{}]+', Text), ], 'math': [ (r'\\([a-zA-Z]+|.)', Name.Variable), include('general'), (r'[0-9]+', Number), (r'[-=!+*/()\[\]]', Operator), (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), ], 'inlinemath': [ (r'\\\)', String, '#pop'), (r'\$', String, '#pop'), include('math'), ], 'displaymath': [ (r'\\\]', String, '#pop'), (r'\$\$', String, '#pop'), (r'\$', Name.Builtin), include('math'), ], 'command': [ (r'\[.*?\]', Name.Attribute), (r'\*', Keyword), (r'', Text, '#pop'), ], } def analyse_text(text): for start in ("\\documentclass", "\\input", "\\documentstyle", "\\relax"): if text[:len(start)] == start: return True class GroffLexer(RegexLexer): """ Lexer for the (g)roff typesetting language, supporting groff extensions. Mainly useful for highlighting manpage sources. *New in Pygments 0.6.* """ name = 'Groff' aliases = ['groff', 'nroff', 'man'] filenames = ['*.[1234567]', '*.man'] mimetypes = ['application/x-troff', 'text/troff'] tokens = { 'root': [ (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), (r'\.', Punctuation, 'request'), # Regular characters, slurp till we find a backslash or newline (r'[^\\\n]*', Text, 'textline'), ], 'textline': [ include('escapes'), (r'[^\\\n]+', Text), (r'\n', Text, '#pop'), ], 'escapes': [ # groff has many ways to write escapes. (r'\\"[^\n]*', Comment), (r'\\[fn]\w', String.Escape), (r'\\\(.{2}', String.Escape), (r'\\.\[.*\]', String.Escape), (r'\\.', String.Escape), (r'\\\n', Text, 'request'), ], 'request': [ (r'\n', Text, '#pop'), include('escapes'), (r'"[^\n"]+"', String.Double), (r'\d+', Number), (r'\S+', String), (r'\s+', Text), ], } def analyse_text(text): if text[:1] != '.': return False if text[:3] == '.\\"': return True if text[:4] == '.TH ': return True if text[1:3].isalnum() and text[3].isspace(): return 0.9 class ApacheConfLexer(RegexLexer): """ Lexer for configuration files following the Apache config file format. *New in Pygments 0.6.* """ name = 'ApacheConf' aliases = ['apacheconf', 'aconf', 'apache'] filenames = ['.htaccess', 'apache.conf', 'apache2.conf'] mimetypes = ['text/x-apacheconf'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'\s+', Text), (r'(#.*?)$', Comment), (r'(<[^\s>]+)(?:(\s+)(.*?))?(>)', bygroups(Name.Tag, Text, String, Name.Tag)), (r'([a-zA-Z][a-zA-Z0-9_]*)(\s+)', bygroups(Name.Builtin, Text), 'value'), (r'\.+', Text), ], 'value': [ (r'$', Text, '#pop'), (r'[^\S\n]+', Text), (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), (r'\d+', Number), (r'/([a-zA-Z0-9][a-zA-Z0-9_./-]+)', String.Other), (r'(on|off|none|any|all|double|email|dns|min|minimal|' r'os|productonly|full|emerg|alert|crit|error|warn|' r'notice|info|debug|registry|script|inetd|standalone|' r'user|group)\b', Keyword), (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), (r'[^\s"]+', Text) ] } class MoinWikiLexer(RegexLexer): """ For MoinMoin (and Trac) Wiki markup. *New in Pygments 0.7.* """ name = 'MoinMoin/Trac Wiki markup' aliases = ['trac-wiki', 'moin'] filenames = [] mimetypes = ['text/x-trac-wiki'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'^#.*$', Comment), (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next # Titles (r'^(=+)([^=]+)(=+)(\s*#.+)?$', bygroups(Generic.Heading, using(this), Generic.Heading, String)), # Literal code blocks, with optional shebang (r'({{{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting # Lists (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), # Other Formatting (r'\[\[\w+.*?\]\]', Keyword), # Macro (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', bygroups(Keyword, String, Keyword)), # Link (r'^----+$', Keyword), # Horizontal rules (r'[^\n\'\[{!_~^,|]+', Text), (r'\n', Text), (r'.', Text), ], 'codeblock': [ (r'}}}', Name.Builtin, '#pop'), # these blocks are allowed to be nested in Trac, but not MoinMoin (r'{{{', Text, '#push'), (r'[^{}]+', Comment.Preproc), # slurp boring text (r'.', Comment.Preproc), # allow loose { or } ], } class RstLexer(RegexLexer): """ For `reStructuredText `_ markup. *New in Pygments 0.7.* Additional options accepted: `handlecodeblocks` Highlight the contents of ``.. sourcecode:: langauge`` and ``.. code:: language`` directives with a lexer for the given language (default: ``True``). *New in Pygments 0.8.* """ name = 'reStructuredText' aliases = ['rst', 'rest', 'restructuredtext'] filenames = ['*.rst', '*.rest'] mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] flags = re.MULTILINE def _handle_sourcecode(self, match): from pygments.lexers import get_lexer_by_name # section header yield match.start(1), Punctuation, match.group(1) yield match.start(2), Text, match.group(2) yield match.start(3), Operator.Word, match.group(3) yield match.start(4), Punctuation, match.group(4) yield match.start(5), Text, match.group(5) yield match.start(6), Keyword, match.group(6) yield match.start(7), Text, match.group(7) # lookup lexer if wanted and existing lexer = None if self.handlecodeblocks: try: lexer = get_lexer_by_name(match.group(6).strip()) except ClassNotFound: pass indention = match.group(8) indention_size = len(indention) code = (indention + match.group(9) + match.group(10) + match.group(11)) # no lexer for this language. handle it like it was a code block if lexer is None: yield match.start(8), String, code return # highlight the lines with the lexer. ins = [] codelines = code.splitlines(True) code = '' for line in codelines: if len(line) > indention_size: ins.append((len(code), [(0, Text, line[:indention_size])])) code += line[indention_size:] else: code += line for item in do_insertions(ins, lexer.get_tokens_unprocessed(code)): yield item # from docutils.parsers.rst.states closers = '\'")]}>\u2019\u201d\xbb!?' unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' % (re.escape(unicode_delimiters), re.escape(closers))) tokens = { 'root': [ # Heading with overline (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' r'(.+)(\n)(\1)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text, Generic.Heading, Text)), # Plain heading (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), # Bulleted lists (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', bygroups(Text, Number, using(this, state='inline'))), # Numbered lists (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', bygroups(Text, Number, using(this, state='inline'))), (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', bygroups(Text, Number, using(this, state='inline'))), # Numbered, but keep words at BOL from becoming lists (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', bygroups(Text, Number, using(this, state='inline'))), (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', bygroups(Text, Number, using(this, state='inline'))), # Line blocks (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', bygroups(Text, Operator, using(this, state='inline'))), # Sourcecode directives (r'^( *\.\.)(\s*)((?:source)?code)(::)([ \t]*)([^\n]+)' r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*|)\n)+)', _handle_sourcecode), # A directive (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, using(this, state='inline'))), # A reference target (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A footnote/citation target (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), # A substitution def (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, Punctuation, Text, using(this, state='inline'))), # Comments (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), # Field list (r'^( *)(:[a-zA-Z-]+:)(\s*)$', bygroups(Text, Name.Class, Text)), (r'^( *)(:.*?:)([ \t]+)(.*?)$', bygroups(Text, Name.Class, Text, Name.Function)), # Definition list (r'^([^ ].*(?)(`__?)', # reference with inline target bygroups(String, String.Interpol, String)), (r'`.+?`__?', String), # reference (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', bygroups(Name.Variable, Name.Attribute)), # role (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', bygroups(Name.Attribute, Name.Variable)), # role (content first) (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis (r'\*.+?\*', Generic.Emph), # Emphasis (r'\[.*?\]_', String), # Footnote or citation (r'<.+?>', Name.Tag), # Hyperlink (r'[^\\\n\[*`:]+', Text), (r'.', Text), ], 'literal': [ (r'[^`]+', String), (r'``' + end_string_suffix, String, '#pop'), (r'`', String), ] } def __init__(self, **options): self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) RegexLexer.__init__(self, **options) def analyse_text(text): if text[:2] == '..' and text[2:3] != '.': return 0.3 p1 = text.find("\n") p2 = text.find("\n", p1 + 1) if (p2 > -1 and # has two lines p1 * 2 + 1 == p2 and # they are the same length text[p1+1] in '-=' and # the next line both starts and ends with text[p1+1] == text[p2-1]): # ...a sufficiently high header return 0.5 class VimLexer(RegexLexer): """ Lexer for VimL script files. *New in Pygments 0.8.* """ name = 'VimL' aliases = ['vim'] filenames = ['*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'] mimetypes = ['text/x-vim'] flags = re.MULTILINE tokens = { 'root': [ (r'^\s*".*', Comment), (r'[ \t]+', Text), # TODO: regexes can have other delims (r'/(\\\\|\\/|[^\n/])*/', String.Regex), (r'"(\\\\|\\"|[^\n"])*"', String.Double), (r"'(\\\\|\\'|[^\n'])*'", String.Single), # Who decided that doublequote was a good comment character?? (r'(?<=\s)"[^\-:.%#=*].*', Comment), (r'-?\d+', Number), (r'#[0-9a-f]{6}', Number.Hex), (r'^:', Punctuation), (r'[()<>+=!|,~-]', Punctuation), # Inexact list. Looks decent. (r'\b(let|if|else|endif|elseif|fun|function|endfunction)\b', Keyword), (r'\b(NONE|bold|italic|underline|dark|light)\b', Name.Builtin), (r'\b\w+\b', Name.Other), # These are postprocessed below (r'.', Text), ], } def __init__(self, **options): from pygments.lexers._vimbuiltins import command, option, auto self._cmd = command self._opt = option self._aut = auto RegexLexer.__init__(self, **options) def is_in(self, w, mapping): r""" It's kind of difficult to decide if something might be a keyword in VimL because it allows you to abbreviate them. In fact, 'ab[breviate]' is a good example. :ab, :abbre, or :abbreviate are valid ways to call it so rather than making really awful regexps like:: \bab(?:b(?:r(?:e(?:v(?:i(?:a(?:t(?:e)?)?)?)?)?)?)?)?\b we match `\b\w+\b` and then call is_in() on those tokens. See `scripts/get_vimkw.py` for how the lists are extracted. """ p = bisect(mapping, (w,)) if p > 0: if mapping[p-1][0] == w[:len(mapping[p-1][0])] and \ mapping[p-1][1][:len(w)] == w: return True if p < len(mapping): return mapping[p][0] == w[:len(mapping[p][0])] and \ mapping[p][1][:len(w)] == w return False def get_tokens_unprocessed(self, text): # TODO: builtins are only subsequent tokens on lines # and 'keywords' only happen at the beginning except # for :au ones for index, token, value in \ RegexLexer.get_tokens_unprocessed(self, text): if token is Name.Other: if self.is_in(value, self._cmd): yield index, Keyword, value elif self.is_in(value, self._opt) or \ self.is_in(value, self._aut): yield index, Name.Builtin, value else: yield index, Text, value else: yield index, token, value class GettextLexer(RegexLexer): """ Lexer for Gettext catalog files. *New in Pygments 0.9.* """ name = 'Gettext Catalog' aliases = ['pot', 'po'] filenames = ['*.pot', '*.po'] mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] tokens = { 'root': [ (r'^#,\s.*?$', Keyword.Type), (r'^#:\s.*?$', Keyword.Declaration), #(r'^#$', Comment), (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), (r'^(")([A-Za-z-]+:)(.*")$', bygroups(String, Name.Property, String)), (r'^".*"$', String), (r'^(msgid|msgid_plural|msgstr)(\s+)(".*")$', bygroups(Name.Variable, Text, String)), (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), ] } class SquidConfLexer(RegexLexer): """ Lexer for `squid `_ configuration files. *New in Pygments 0.9.* """ name = 'SquidConf' aliases = ['squidconf', 'squid.conf', 'squid'] filenames = ['squid.conf'] mimetypes = ['text/x-squidconf'] flags = re.IGNORECASE keywords = [ "access_log", "acl", "always_direct", "announce_host", "announce_period", "announce_port", "announce_to", "anonymize_headers", "append_domain", "as_whois_server", "auth_param_basic", "authenticate_children", "authenticate_program", "authenticate_ttl", "broken_posts", "buffered_logs", "cache_access_log", "cache_announce", "cache_dir", "cache_dns_program", "cache_effective_group", "cache_effective_user", "cache_host", "cache_host_acl", "cache_host_domain", "cache_log", "cache_mem", "cache_mem_high", "cache_mem_low", "cache_mgr", "cachemgr_passwd", "cache_peer", "cache_peer_access", "cahce_replacement_policy", "cache_stoplist", "cache_stoplist_pattern", "cache_store_log", "cache_swap", "cache_swap_high", "cache_swap_log", "cache_swap_low", "client_db", "client_lifetime", "client_netmask", "connect_timeout", "coredump_dir", "dead_peer_timeout", "debug_options", "delay_access", "delay_class", "delay_initial_bucket_level", "delay_parameters", "delay_pools", "deny_info", "dns_children", "dns_defnames", "dns_nameservers", "dns_testnames", "emulate_httpd_log", "err_html_text", "fake_user_agent", "firewall_ip", "forwarded_for", "forward_snmpd_port", "fqdncache_size", "ftpget_options", "ftpget_program", "ftp_list_width", "ftp_passive", "ftp_user", "half_closed_clients", "header_access", "header_replace", "hierarchy_stoplist", "high_response_time_warning", "high_page_fault_warning", "hosts_file", "htcp_port", "http_access", "http_anonymizer", "httpd_accel", "httpd_accel_host", "httpd_accel_port", "httpd_accel_uses_host_header", "httpd_accel_with_proxy", "http_port", "http_reply_access", "icp_access", "icp_hit_stale", "icp_port", "icp_query_timeout", "ident_lookup", "ident_lookup_access", "ident_timeout", "incoming_http_average", "incoming_icp_average", "inside_firewall", "ipcache_high", "ipcache_low", "ipcache_size", "local_domain", "local_ip", "logfile_rotate", "log_fqdn", "log_icp_queries", "log_mime_hdrs", "maximum_object_size", "maximum_single_addr_tries", "mcast_groups", "mcast_icp_query_timeout", "mcast_miss_addr", "mcast_miss_encode_key", "mcast_miss_port", "memory_pools", "memory_pools_limit", "memory_replacement_policy", "mime_table", "min_http_poll_cnt", "min_icp_poll_cnt", "minimum_direct_hops", "minimum_object_size", "minimum_retry_timeout", "miss_access", "negative_dns_ttl", "negative_ttl", "neighbor_timeout", "neighbor_type_domain", "netdb_high", "netdb_low", "netdb_ping_period", "netdb_ping_rate", "never_direct", "no_cache", "passthrough_proxy", "pconn_timeout", "pid_filename", "pinger_program", "positive_dns_ttl", "prefer_direct", "proxy_auth", "proxy_auth_realm", "query_icmp", "quick_abort", "quick_abort", "quick_abort_max", "quick_abort_min", "quick_abort_pct", "range_offset_limit", "read_timeout", "redirect_children", "redirect_program", "redirect_rewrites_host_header", "reference_age", "reference_age", "refresh_pattern", "reload_into_ims", "request_body_max_size", "request_size", "request_timeout", "shutdown_lifetime", "single_parent_bypass", "siteselect_timeout", "snmp_access", "snmp_incoming_address", "snmp_port", "source_ping", "ssl_proxy", "store_avg_object_size", "store_objects_per_bucket", "strip_query_terms", "swap_level1_dirs", "swap_level2_dirs", "tcp_incoming_address", "tcp_outgoing_address", "tcp_recv_bufsize", "test_reachability", "udp_hit_obj", "udp_hit_obj_size", "udp_incoming_address", "udp_outgoing_address", "unique_hostname", "unlinkd_program", "uri_whitespace", "useragent_log", "visible_hostname", "wais_relay", "wais_relay_host", "wais_relay_port", ] opts = [ "proxy-only", "weight", "ttl", "no-query", "default", "round-robin", "multicast-responder", "on", "off", "all", "deny", "allow", "via", "parent", "no-digest", "heap", "lru", "realm", "children", "q1", "q2", "credentialsttl", "none", "disable", "offline_toggle", "diskd", ] actions = [ "shutdown", "info", "parameter", "server_list", "client_list", r'squid\.conf', ] actions_stats = [ "objects", "vm_objects", "utilization", "ipcache", "fqdncache", "dns", "redirector", "io", "reply_headers", "filedescriptors", "netdb", ] actions_log = ["status", "enable", "disable", "clear"] acls = [ "url_regex", "urlpath_regex", "referer_regex", "port", "proto", "req_mime_type", "rep_mime_type", "method", "browser", "user", "src", "dst", "time", "dstdomain", "ident", "snmp_community", ] ip_re = ( r'(?:(?:(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|0x0*[0-9a-f]{1,2}|' r'0+[1-3]?[0-7]{0,2})(?:\.(?:[3-9]\d?|2(?:5[0-5]|[0-4]?\d)?|1\d{0,2}|' r'0x0*[0-9a-f]{1,2}|0+[1-3]?[0-7]{0,2})){3})|(?!.*::.*::)(?:(?!:)|' r':(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?`` outputs. *New in Pygments 0.9.* """ name = 'Debian Control file' aliases = ['control', 'debcontrol'] filenames = ['control'] tokens = { 'root': [ (r'^(Description)', Keyword, 'description'), (r'^(Maintainer)(:\s*)', bygroups(Keyword, Text), 'maintainer'), (r'^((Build-)?Depends)', Keyword, 'depends'), (r'^((?:Python-)?Version)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^((?:Installed-)?Size)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^(MD5Sum|SHA1|SHA256)(:\s*)(\S+)$', bygroups(Keyword, Text, Number)), (r'^([a-zA-Z\-0-9\.]*?)(:\s*)(.*?)$', bygroups(Keyword, Whitespace, String)), ], 'maintainer': [ (r'<[^>]+>', Generic.Strong), (r'<[^>]+>$', Generic.Strong, '#pop'), (r',\n?', Text), (r'.', Text), ], 'description': [ (r'(.*)(Homepage)(: )(\S+)', bygroups(Text, String, Name, Name.Class)), (r':.*\n', Generic.Strong), (r' .*\n', Text), ('', Text, '#pop'), ], 'depends': [ (r':\s*', Text), (r'(\$)(\{)(\w+\s*:\s*\w+)', bygroups(Operator, Text, Name.Entity)), (r'\(', Text, 'depend_vers'), (r',', Text), (r'\|', Operator), (r'[\s]+', Text), (r'[}\)]\s*$', Text, '#pop'), (r'}', Text), (r'[^,]$', Name.Function, '#pop'), (r'([\+\.a-zA-Z0-9-])(\s*)', bygroups(Name.Function, Text)), (r'\[.*?\]', Name.Entity), ], 'depend_vers': [ (r'\),', Text, '#pop'), (r'\)[^,]', Text, '#pop:2'), (r'([><=]+)(\s*)([^\)]+)', bygroups(Operator, Text, Number)) ] } class YamlLexerContext(LexerContext): """Indentation context for the YAML lexer.""" def __init__(self, *args, **kwds): super(YamlLexerContext, self).__init__(*args, **kwds) self.indent_stack = [] self.indent = -1 self.next_indent = 0 self.block_scalar_indent = None class YamlLexer(ExtendedRegexLexer): """ Lexer for `YAML `_, a human-friendly data serialization language. *New in Pygments 0.11.* """ name = 'YAML' aliases = ['yaml'] filenames = ['*.yaml', '*.yml'] mimetypes = ['text/x-yaml'] def something(token_class): """Do not produce empty tokens.""" def callback(lexer, match, context): text = match.group() if not text: return yield match.start(), token_class, text context.pos = match.end() return callback def reset_indent(token_class): """Reset the indentation levels.""" def callback(lexer, match, context): text = match.group() context.indent_stack = [] context.indent = -1 context.next_indent = 0 context.block_scalar_indent = None yield match.start(), token_class, text context.pos = match.end() return callback def save_indent(token_class, start=False): """Save a possible indentation level.""" def callback(lexer, match, context): text = match.group() extra = '' if start: context.next_indent = len(text) if context.next_indent < context.indent: while context.next_indent < context.indent: context.indent = context.indent_stack.pop() if context.next_indent > context.indent: extra = text[context.indent:] text = text[:context.indent] else: context.next_indent += len(text) if text: yield match.start(), token_class, text if extra: yield match.start()+len(text), token_class.Error, extra context.pos = match.end() return callback def set_indent(token_class, implicit=False): """Set the previously saved indentation level.""" def callback(lexer, match, context): text = match.group() if context.indent < context.next_indent: context.indent_stack.append(context.indent) context.indent = context.next_indent if not implicit: context.next_indent += len(text) yield match.start(), token_class, text context.pos = match.end() return callback def set_block_scalar_indent(token_class): """Set an explicit indentation level for a block scalar.""" def callback(lexer, match, context): text = match.group() context.block_scalar_indent = None if not text: return increment = match.group(1) if increment: current_indent = max(context.indent, 0) increment = int(increment) context.block_scalar_indent = current_indent + increment if text: yield match.start(), token_class, text context.pos = match.end() return callback def parse_block_scalar_empty_line(indent_token_class, content_token_class): """Process an empty line in a block scalar.""" def callback(lexer, match, context): text = match.group() if (context.block_scalar_indent is None or len(text) <= context.block_scalar_indent): if text: yield match.start(), indent_token_class, text else: indentation = text[:context.block_scalar_indent] content = text[context.block_scalar_indent:] yield match.start(), indent_token_class, indentation yield (match.start()+context.block_scalar_indent, content_token_class, content) context.pos = match.end() return callback def parse_block_scalar_indent(token_class): """Process indentation spaces in a block scalar.""" def callback(lexer, match, context): text = match.group() if context.block_scalar_indent is None: if len(text) <= max(context.indent, 0): context.stack.pop() context.stack.pop() return context.block_scalar_indent = len(text) else: if len(text) < context.block_scalar_indent: context.stack.pop() context.stack.pop() return if text: yield match.start(), token_class, text context.pos = match.end() return callback def parse_plain_scalar_indent(token_class): """Process indentation spaces in a plain scalar.""" def callback(lexer, match, context): text = match.group() if len(text) <= context.indent: context.stack.pop() context.stack.pop() return if text: yield match.start(), token_class, text context.pos = match.end() return callback tokens = { # the root rules 'root': [ # ignored whitespaces (r'[ ]+(?=#|$)', Text), # line breaks (r'\n+', Text), # a comment (r'#[^\n]*', Comment.Single), # the '%YAML' directive (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), # the %TAG directive (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), # document start and document end indicators (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), 'block-line'), # indentation spaces (r'[ ]*(?![ \t\n\r\f\v]|$)', save_indent(Text, start=True), ('block-line', 'indentation')), ], # trailing whitespaces after directives or a block scalar indicator 'ignored-line': [ # ignored whitespaces (r'[ ]+(?=#|$)', Text), # a comment (r'#[^\n]*', Comment.Single), # line break (r'\n', Text, '#pop:2'), ], # the %YAML directive 'yaml-directive': [ # the version number (r'([ ]+)([0-9]+\.[0-9]+)', bygroups(Text, Number), 'ignored-line'), ], # the %YAG directive 'tag-directive': [ # a tag handle and the corresponding prefix (r'([ ]+)(!|![0-9A-Za-z_-]*!)' r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)', bygroups(Text, Keyword.Type, Text, Keyword.Type), 'ignored-line'), ], # block scalar indicators and indentation spaces 'indentation': [ # trailing whitespaces are ignored (r'[ ]*$', something(Text), '#pop:2'), # whitespaces preceeding block collection indicators (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), # block collection indicators (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), # the beginning a block line (r'[ ]*', save_indent(Text), '#pop'), ], # an indented line in the block context 'block-line': [ # the line end (r'[ ]*(?=#|$)', something(Text), '#pop'), # whitespaces separating tokens (r'[ ]+', Text), # tags, anchors and aliases, include('descriptors'), # block collections and scalars include('block-nodes'), # flow collections and quoted scalars include('flow-nodes'), # a plain scalar (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])', something(Name.Variable), 'plain-scalar-in-block-context'), ], # tags, anchors, aliases 'descriptors' : [ # a full-form tag (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Keyword.Type), # a tag in the form '!', '!suffix' or '!handle!suffix' (r'!(?:[0-9A-Za-z_-]+)?' r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Keyword.Type), # an anchor (r'&[0-9A-Za-z_-]+', Name.Label), # an alias (r'\*[0-9A-Za-z_-]+', Name.Variable), ], # block collections and scalars 'block-nodes': [ # implicit key (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), # literal and folded scalars (r'[|>]', Punctuation.Indicator, ('block-scalar-content', 'block-scalar-header')), ], # flow collections and quoted scalars 'flow-nodes': [ # a flow sequence (r'\[', Punctuation.Indicator, 'flow-sequence'), # a flow mapping (r'\{', Punctuation.Indicator, 'flow-mapping'), # a single-quoted scalar (r'\'', String, 'single-quoted-scalar'), # a double-quoted scalar (r'\"', String, 'double-quoted-scalar'), ], # the content of a flow collection 'flow-collection': [ # whitespaces (r'[ ]+', Text), # line breaks (r'\n+', Text), # a comment (r'#[^\n]*', Comment.Single), # simple indicators (r'[?:,]', Punctuation.Indicator), # tags, anchors and aliases include('descriptors'), # nested collections and quoted scalars include('flow-nodes'), # a plain scalar (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])', something(Name.Variable), 'plain-scalar-in-flow-context'), ], # a flow sequence indicated by '[' and ']' 'flow-sequence': [ # include flow collection rules include('flow-collection'), # the closing indicator (r'\]', Punctuation.Indicator, '#pop'), ], # a flow mapping indicated by '{' and '}' 'flow-mapping': [ # include flow collection rules include('flow-collection'), # the closing indicator (r'\}', Punctuation.Indicator, '#pop'), ], # block scalar lines 'block-scalar-content': [ # line break (r'\n', Text), # empty line (r'^[ ]+$', parse_block_scalar_empty_line(Text, Name.Constant)), # indentation spaces (we may leave the state here) (r'^[ ]*', parse_block_scalar_indent(Text)), # line content (r'[^\n\r\f\v]+', Name.Constant), ], # the content of a literal or folded scalar 'block-scalar-header': [ # indentation indicator followed by chomping flag (r'([1-9])?[+-]?(?=[ ]|$)', set_block_scalar_indent(Punctuation.Indicator), 'ignored-line'), # chomping flag followed by indentation indicator (r'[+-]?([1-9])?(?=[ ]|$)', set_block_scalar_indent(Punctuation.Indicator), 'ignored-line'), ], # ignored and regular whitespaces in quoted scalars 'quoted-scalar-whitespaces': [ # leading and trailing whitespaces are ignored (r'^[ ]+', Text), (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value (r'[ ]+', Name.Variable), ], # single-quoted scalars 'single-quoted-scalar': [ # include whitespace and line break rules include('quoted-scalar-whitespaces'), # escaping of the quote character (r'\'\'', String.Escape), # regular non-whitespace characters (r'[^ \t\n\r\f\v\']+', String), # the closing quote (r'\'', String, '#pop'), ], # double-quoted scalars 'double-quoted-scalar': [ # include whitespace and line break rules include('quoted-scalar-whitespaces'), # escaping of special characters (r'\\[0abt\tn\nvfre "\\N_LP]', String), # escape codes (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', String.Escape), # regular non-whitespace characters (r'[^ \t\n\r\f\v\"\\]+', String), # the closing quote (r'"', String, '#pop'), ], # the beginning of a new line while scanning a plain scalar 'plain-scalar-in-block-context-new-line': [ # empty lines (r'^[ ]+$', Text), # line breaks (r'\n+', Text), # document start and document end indicators (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), # indentation spaces (we may leave the block line state here) (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), ], # a plain scalar in the block context 'plain-scalar-in-block-context': [ # the scalar ends with the ':' indicator (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), # the scalar ends with whitespaces followed by a comment (r'[ ]+(?=#)', Text, '#pop'), # trailing whitespaces are ignored (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), # other whitespaces are a part of the value (r'[ ]+', Literal.Scalar.Plain), # regular non-whitespace characters (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+', Literal.Scalar.Plain), ], # a plain scalar is the flow context 'plain-scalar-in-flow-context': [ # the scalar ends with an indicator character (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), # the scalar ends with a comment (r'[ ]+(?=#)', Text, '#pop'), # leading and trailing whitespaces are ignored (r'^[ ]+', Text), (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value (r'[ ]+', Name.Variable), # regular non-whitespace characters (r'[^ \t\n\r\f\v,:?\[\]{}]+', Name.Variable), ], } def get_tokens_unprocessed(self, text=None, context=None): if context is None: context = YamlLexerContext(text, 0) return super(YamlLexer, self).get_tokens_unprocessed(text, context) class LighttpdConfLexer(RegexLexer): """ Lexer for `Lighttpd `_ configuration files. *New in Pygments 0.11.* """ name = 'Lighttpd configuration file' aliases = ['lighty', 'lighttpd'] filenames = [] mimetypes = ['text/x-lighttpd-conf'] tokens = { 'root': [ (r'#.*\n', Comment.Single), (r'/\S*', Name), # pathname (r'[a-zA-Z._-]+', Keyword), (r'\d+\.\d+\.\d+\.\d+(?:/\d+)?', Number), (r'[0-9]+', Number), (r'=>|=~|\+=|==|=|\+', Operator), (r'\$[A-Z]+', Name.Builtin), (r'[(){}\[\],]', Punctuation), (r'"([^"\\]*(?:\\.[^"\\]*)*)"', String.Double), (r'\s+', Text), ], } class NginxConfLexer(RegexLexer): """ Lexer for `Nginx `_ configuration files. *New in Pygments 0.11.* """ name = 'Nginx configuration file' aliases = ['nginx'] filenames = [] mimetypes = ['text/x-nginx-conf'] tokens = { 'root': [ (r'(include)(\s+)([^\s;]+)', bygroups(Keyword, Text, Name)), (r'[^\s;#]+', Keyword, 'stmt'), include('base'), ], 'block': [ (r'}', Punctuation, '#pop:2'), (r'[^\s;#]+', Keyword.Namespace, 'stmt'), include('base'), ], 'stmt': [ (r'{', Punctuation, 'block'), (r';', Punctuation, '#pop'), include('base'), ], 'base': [ (r'#.*\n', Comment.Single), (r'on|off', Name.Constant), (r'\$[^\s;#()]+', Name.Variable), (r'([a-z0-9.-]+)(:)([0-9]+)', bygroups(Name, Punctuation, Number.Integer)), (r'[a-z-]+/[a-z-+]+', String), # mimetype #(r'[a-zA-Z._-]+', Keyword), (r'[0-9]+[km]?\b', Number.Integer), (r'(~)(\s*)([^\s{]+)', bygroups(Punctuation, Text, String.Regex)), (r'[:=~]', Punctuation), (r'[^\s;#{}$]+', String), # catch all (r'/[^\s;#]*', Name), # pathname (r'\s+', Text), (r'[$;]', Text), # leftover characters ], } class CMakeLexer(RegexLexer): """ Lexer for `CMake `_ files. *New in Pygments 1.2.* """ name = 'CMake' aliases = ['cmake'] filenames = ['*.cmake', 'CMakeLists.txt'] mimetypes = ['text/x-cmake'] tokens = { 'root': [ #(r'(ADD_CUSTOM_COMMAND|ADD_CUSTOM_TARGET|ADD_DEFINITIONS|' # r'ADD_DEPENDENCIES|ADD_EXECUTABLE|ADD_LIBRARY|ADD_SUBDIRECTORY|' # r'ADD_TEST|AUX_SOURCE_DIRECTORY|BUILD_COMMAND|BUILD_NAME|' # r'CMAKE_MINIMUM_REQUIRED|CONFIGURE_FILE|CREATE_TEST_SOURCELIST|' # r'ELSE|ELSEIF|ENABLE_LANGUAGE|ENABLE_TESTING|ENDFOREACH|' # r'ENDFUNCTION|ENDIF|ENDMACRO|ENDWHILE|EXEC_PROGRAM|' # r'EXECUTE_PROCESS|EXPORT_LIBRARY_DEPENDENCIES|FILE|FIND_FILE|' # r'FIND_LIBRARY|FIND_PACKAGE|FIND_PATH|FIND_PROGRAM|FLTK_WRAP_UI|' # r'FOREACH|FUNCTION|GET_CMAKE_PROPERTY|GET_DIRECTORY_PROPERTY|' # r'GET_FILENAME_COMPONENT|GET_SOURCE_FILE_PROPERTY|' # r'GET_TARGET_PROPERTY|GET_TEST_PROPERTY|IF|INCLUDE|' # r'INCLUDE_DIRECTORIES|INCLUDE_EXTERNAL_MSPROJECT|' # r'INCLUDE_REGULAR_EXPRESSION|INSTALL|INSTALL_FILES|' # r'INSTALL_PROGRAMS|INSTALL_TARGETS|LINK_DIRECTORIES|' # r'LINK_LIBRARIES|LIST|LOAD_CACHE|LOAD_COMMAND|MACRO|' # r'MAKE_DIRECTORY|MARK_AS_ADVANCED|MATH|MESSAGE|OPTION|' # r'OUTPUT_REQUIRED_FILES|PROJECT|QT_WRAP_CPP|QT_WRAP_UI|REMOVE|' # r'REMOVE_DEFINITIONS|SEPARATE_ARGUMENTS|SET|' # r'SET_DIRECTORY_PROPERTIES|SET_SOURCE_FILES_PROPERTIES|' # r'SET_TARGET_PROPERTIES|SET_TESTS_PROPERTIES|SITE_NAME|' # r'SOURCE_GROUP|STRING|SUBDIR_DEPENDS|SUBDIRS|' # r'TARGET_LINK_LIBRARIES|TRY_COMPILE|TRY_RUN|UNSET|' # r'USE_MANGLED_MESA|UTILITY_SOURCE|VARIABLE_REQUIRES|' # r'VTK_MAKE_INSTANTIATOR|VTK_WRAP_JAVA|VTK_WRAP_PYTHON|' # r'VTK_WRAP_TCL|WHILE|WRITE_FILE|' # r'COUNTARGS)\b', Name.Builtin, 'args'), (r'\b(\w+)([ \t]*)(\()', bygroups(Name.Builtin, Text, Punctuation), 'args'), include('keywords'), include('ws') ], 'args': [ (r'\(', Punctuation, '#push'), (r'\)', Punctuation, '#pop'), (r'(\${)(.+?)(})', bygroups(Operator, Name.Variable, Operator)), (r'(?s)".*?"', String.Double), (r'\\\S+', String), (r'[^\)$"# \t\n]+', String), (r'\n', Text), # explicitly legal include('keywords'), include('ws') ], 'string': [ ], 'keywords': [ (r'\b(WIN32|UNIX|APPLE|CYGWIN|BORLAND|MINGW|MSVC|MSVC_IDE|MSVC60|' r'MSVC70|MSVC71|MSVC80|MSVC90)\b', Keyword), ], 'ws': [ (r'[ \t]+', Text), (r'#.+\n', Comment), ] } class HttpLexer(RegexLexer): """ Lexer for HTTP sessions. *New in Pygments 1.5.* """ name = 'HTTP' aliases = ['http'] flags = re.DOTALL def header_callback(self, match): if match.group(1).lower() == 'content-type': content_type = match.group(5).strip() if ';' in content_type: content_type = content_type[:content_type.find(';')].strip() self.content_type = content_type yield match.start(1), Name.Attribute, match.group(1) yield match.start(2), Text, match.group(2) yield match.start(3), Operator, match.group(3) yield match.start(4), Text, match.group(4) yield match.start(5), Literal, match.group(5) yield match.start(6), Text, match.group(6) def continuous_header_callback(self, match): yield match.start(1), Text, match.group(1) yield match.start(2), Literal, match.group(2) yield match.start(3), Text, match.group(3) def content_callback(self, match): content_type = getattr(self, 'content_type', None) content = match.group() offset = match.start() if content_type: from pygments.lexers import get_lexer_for_mimetype try: lexer = get_lexer_for_mimetype(content_type) except ClassNotFound: pass else: for idx, token, value in lexer.get_tokens_unprocessed(content): yield offset + idx, token, value return yield offset, Text, content tokens = { 'root': [ (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH)( +)([^ ]+)( +)' r'(HTTP)(/)(1\.[01])(\r?\n|$)', bygroups(Name.Function, Text, Name.Namespace, Text, Keyword.Reserved, Operator, Number, Text), 'headers'), (r'(HTTP)(/)(1\.[01])( +)(\d{3})( +)([^\r\n]+)(\r?\n|$)', bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, Name.Exception, Text), 'headers'), ], 'headers': [ (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|$)', header_callback), (r'([\t ]+)([^\r\n]+)(\r?\n|$)', continuous_header_callback), (r'\r?\n', Text, 'content') ], 'content': [ (r'.+', content_callback) ] } class PyPyLogLexer(RegexLexer): """ Lexer for PyPy log files. *New in Pygments 1.5.* """ name = "PyPy Log" aliases = ["pypylog", "pypy"] filenames = ["*.pypylog"] mimetypes = ['application/x-pypylog'] tokens = { "root": [ (r"\[\w+\] {jit-log-.*?$", Keyword, "jit-log"), (r"\[\w+\] {jit-backend-counts$", Keyword, "jit-backend-counts"), include("extra-stuff"), ], "jit-log": [ (r"\[\w+\] jit-log-.*?}$", Keyword, "#pop"), (r"^\+\d+: ", Comment), (r"--end of the loop--", Comment), (r"[ifp]\d+", Name), (r"ptr\d+", Name), (r"(\()(\w+(?:\.\w+)?)(\))", bygroups(Punctuation, Name.Builtin, Punctuation)), (r"[\[\]=,()]", Punctuation), (r"(\d+\.\d+|inf|-inf)", Number.Float), (r"-?\d+", Number.Integer), (r"'.*'", String), (r"(None|descr|ConstClass|ConstPtr|TargetToken)", Name), (r"<.*?>+", Name.Builtin), (r"(label|debug_merge_point|jump|finish)", Name.Class), (r"(int_add_ovf|int_add|int_sub_ovf|int_sub|int_mul_ovf|int_mul|" r"int_floordiv|int_mod|int_lshift|int_rshift|int_and|int_or|" r"int_xor|int_eq|int_ne|int_ge|int_gt|int_le|int_lt|int_is_zero|" r"int_is_true|" r"uint_floordiv|uint_ge|uint_lt|" r"float_add|float_sub|float_mul|float_truediv|float_neg|" r"float_eq|float_ne|float_ge|float_gt|float_le|float_lt|float_abs|" r"ptr_eq|ptr_ne|instance_ptr_eq|instance_ptr_ne|" r"cast_int_to_float|cast_float_to_int|" r"force_token|quasiimmut_field|same_as|virtual_ref_finish|" r"virtual_ref|mark_opaque_ptr|" r"call_may_force|call_assembler|call_loopinvariant|" r"call_release_gil|call_pure|call|" r"new_with_vtable|new_array|newstr|newunicode|new|" r"arraylen_gc|" r"getarrayitem_gc_pure|getarrayitem_gc|setarrayitem_gc|" r"getarrayitem_raw|setarrayitem_raw|getfield_gc_pure|" r"getfield_gc|getinteriorfield_gc|setinteriorfield_gc|" r"getfield_raw|setfield_gc|setfield_raw|" r"strgetitem|strsetitem|strlen|copystrcontent|" r"unicodegetitem|unicodesetitem|unicodelen|" r"guard_true|guard_false|guard_value|guard_isnull|" r"guard_nonnull_class|guard_nonnull|guard_class|guard_no_overflow|" r"guard_not_forced|guard_no_exception|guard_not_invalidated)", Name.Builtin), include("extra-stuff"), ], "jit-backend-counts": [ (r"\[\w+\] jit-backend-counts}$", Keyword, "#pop"), (r":", Punctuation), (r"\d+", Number), include("extra-stuff"), ], "extra-stuff": [ (r"\s+", Text), (r"#.*?$", Comment), ], } class HxmlLexer(RegexLexer): """ Lexer for `haXe build `_ files. *New in Pygments 1.6.* """ name = 'Hxml' aliases = ['haxeml', 'hxml'] filenames = ['*.hxml'] tokens = { 'root': [ # Seperator (r'(--)(next)', bygroups(Punctuation, Generic.Heading)), # Compiler switches with one dash (r'(-)(prompt|debug|v)', bygroups(Punctuation, Keyword.Keyword)), # Compilerswitches with two dashes (r'(--)(neko-source|flash-strict|flash-use-stage|no-opt|no-traces|' r'no-inline|times|no-output)', bygroups(Punctuation, Keyword)), # Targets and other options that take an argument (r'(-)(cpp|js|neko|x|as3|swf9?|swf-lib|php|xml|main|lib|D|resource|' r'cp|cmd)( +)(.+)', bygroups(Punctuation, Keyword, Whitespace, String)), # Options that take only numerical arguments (r'(-)(swf-version)( +)(\d+)', bygroups(Punctuation, Keyword, Number.Integer)), # An Option that defines the size, the fps and the background # color of an flash movie (r'(-)(swf-header)( +)(\d+)(:)(\d+)(:)(\d+)(:)([A-Fa-f0-9]{6})', bygroups(Punctuation, Keyword, Whitespace, Number.Integer, Punctuation, Number.Integer, Punctuation, Number.Integer, Punctuation, Number.Hex)), # options with two dashes that takes arguments (r'(--)(js-namespace|php-front|php-lib|remap|gen-hx-classes)( +)' r'(.+)', bygroups(Punctuation, Keyword, Whitespace, String)), # Single line comment, multiline ones are not allowed. (r'#.*', Comment.Single) ] } class EbnfLexer(RegexLexer): """ Lexer for `ISO/IEC 14977 EBNF `_ grammars. *New in Pygments 1.7.* """ name = 'EBNF' aliases = ['ebnf'] filenames = ['*.ebnf'] mimetypes = ['text/x-ebnf'] tokens = { 'root': [ include('whitespace'), include('comment_start'), include('identifier'), (r'=', Operator, 'production'), ], 'production': [ include('whitespace'), include('comment_start'), include('identifier'), (r'"[^"]*"', String.Double), (r"'[^']*'", String.Single), (r'(\?[^?]*\?)', Name.Entity), (r'[\[\]{}(),|]', Punctuation), (r'-', Operator), (r';', Punctuation, '#pop'), ], 'whitespace': [ (r'\s+', Text), ], 'comment_start': [ (r'\(\*', Comment.Multiline, 'comment'), ], 'comment': [ (r'[^*)]', Comment.Multiline), include('comment_start'), (r'\*\)', Comment.Multiline, '#pop'), (r'[*)]', Comment.Multiline), ], 'identifier': [ (r'([a-zA-Z][a-zA-Z0-9 \-]*)', Keyword), ], }