# -*- coding: utf-8 -*- """ pygments.lexers.erlang ~~~~~~~~~~~~~~~~~~~~~~ Lexers for Erlang. :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ include, default from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Generic __all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', 'ElixirLexer'] line_re = re.compile('.*?\n') class ErlangLexer(RegexLexer): """ For the Erlang functional programming language. Blame Jeremy Thurgood (http://jerith.za.net/). .. versionadded:: 0.9 """ name = 'Erlang' aliases = ['erlang'] filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] mimetypes = ['text/x-erlang'] keywords = ( 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive', 'try', 'when', ) builtins = ( # See erlang(3) man page 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', 'float', 'float_to_list', 'fun_info', 'fun_to_list', 'function_exported', 'garbage_collect', 'get', 'get_keys', 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', 'pid_to_list', 'port_close', 'port_command', 'port_connect', 'port_control', 'port_call', 'port_info', 'port_to_list', 'process_display', 'process_flag', 'process_info', 'purge_module', 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', 'spawn_opt', 'split_binary', 'start_timer', 'statistics', 'suspend_process', 'system_flag', 'system_info', 'system_monitor', 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' ) operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' word_operators = ( 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', 'div', 'not', 'or', 'orelse', 'rem', 'xor' ) atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" variable_re = r'(?:[A-Z_]\w*)' escape_re = r'(?:\\(?:[bdefnrstv\'"\\/]|[0-7][0-7]?[0-7]?|\^[a-zA-Z]))' macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' tokens = { 'root': [ (r'\s+', Text), (r'%.*\n', Comment), (words(keywords, suffix=r'\b'), Keyword), (words(builtins, suffix=r'\b'), Name.Builtin), (words(word_operators, suffix=r'\b'), Operator.Word), (r'^-', Punctuation, 'directive'), (operators, Operator), (r'"', String, 'string'), (r'<<', Name.Label), (r'>>', Name.Label), ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', bygroups(Name.Function, Text, Punctuation)), (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), (r'[+-]?\d+', Number.Integer), (r'[+-]?\d+.\d+', Number.Float), (r'[]\[:_@\".{}()|;,]', Punctuation), (variable_re, Name.Variable), (atom_re, Name), (r'\?'+macro_re, Name.Constant), (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), ], 'string': [ (escape_re, String.Escape), (r'"', String, '#pop'), (r'~[0-9.*]*[~#+bBcdefginpPswWxX]', String.Interpol), (r'[^"\\~]+', String), (r'~', String), ], 'directive': [ (r'(define)(\s*)(\()('+macro_re+r')', bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), (r'(record)(\s*)(\()('+macro_re+r')', bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), (atom_re, Name.Entity, '#pop'), ], } class ErlangShellLexer(Lexer): """ Shell sessions in erl (for Erlang code). .. versionadded:: 1.1 """ name = 'Erlang erl session' aliases = ['erl'] filenames = ['*.erl-sh'] mimetypes = ['text/x-erl-shellsession'] _prompt_re = re.compile(r'\d+>(?=\s|\Z)') def get_tokens_unprocessed(self, text): erlexer = ErlangLexer(**self.options) curcode = '' insertions = [] for match in line_re.finditer(text): line = match.group() m = self._prompt_re.match(line) if m is not None: end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: for item in do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] if line.startswith('*'): yield match.start(), Generic.Traceback, line else: yield match.start(), Generic.Output, line if curcode: for item in do_insertions(insertions, erlexer.get_tokens_unprocessed(curcode)): yield item def gen_elixir_string_rules(name, symbol, token): states = {} states['string_' + name] = [ (r'[^#%s\\]+' % (symbol,), token), include('escapes'), (r'\\.', token), (r'(%s)' % (symbol,), bygroups(token), "#pop"), include('interpol') ] return states def gen_elixir_sigstr_rules(term, token, interpol=True): if interpol: return [ (r'[^#%s\\]+' % (term,), token), include('escapes'), (r'\\.', token), (r'%s[a-zA-Z]*' % (term,), token, '#pop'), include('interpol') ] else: return [ (r'[^%s\\]+' % (term,), token), (r'\\.', token), (r'%s[a-zA-Z]*' % (term,), token, '#pop'), ] class ElixirLexer(RegexLexer): """ For the `Elixir language `_. .. versionadded:: 1.5 """ name = 'Elixir' aliases = ['elixir', 'ex', 'exs'] filenames = ['*.ex', '*.exs'] mimetypes = ['text/x-elixir'] KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') BUILTIN = ( 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', 'quote', 'unquote', 'unquote_splicing', 'throw', 'super' ) BUILTIN_DECLARATION = ( 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback' ) BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') CONSTANT = ('nil', 'true', 'false') PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') OPERATORS3 = ( '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', '~>>', '<~>', '|~>', '<|>', ) OPERATORS2 = ( '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', '->', '<-', '|', '.', '=', '~>', '<~', ) OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') PUNCTUATION = ( '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']' ) def get_tokens_unprocessed(self, text): for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): if token is Name: if value in self.KEYWORD: yield index, Keyword, value elif value in self.KEYWORD_OPERATOR: yield index, Operator.Word, value elif value in self.BUILTIN: yield index, Keyword, value elif value in self.BUILTIN_DECLARATION: yield index, Keyword.Declaration, value elif value in self.BUILTIN_NAMESPACE: yield index, Keyword.Namespace, value elif value in self.CONSTANT: yield index, Name.Constant, value elif value in self.PSEUDO_VAR: yield index, Name.Builtin.Pseudo, value else: yield index, token, value else: yield index, token, value def gen_elixir_sigil_rules(): # all valid sigil terminators (excluding heredocs) terminators = [ (r'\{', r'\}', 'cb'), (r'\[', r'\]', 'sb'), (r'\(', r'\)', 'pa'), (r'<', r'>', 'ab'), (r'/', r'/', 'slas'), (r'\|', r'\|', 'pipe'), ('"', '"', 'quot'), ("'", "'", 'apos'), ] # heredocs have slightly different rules triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] token = String.Other states = {'sigils': []} for term, name in triquotes: states['sigils'] += [ (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-intp')), (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), (name + '-end', name + '-no-intp')), ] states[name + '-end'] = [ (r'[a-zA-Z]+', token, '#pop'), default('#pop'), ] states[name + '-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_interpol'), ] states[name + '-no-intp'] = [ (r'^\s*' + term, String.Heredoc, '#pop'), include('heredoc_no_interpol'), ] for lterm, rterm, name in terminators: states['sigils'] += [ (r'~[a-z]' + lterm, token, name + '-intp'), (r'~[A-Z]' + lterm, token, name + '-no-intp'), ] states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) states[name + '-no-intp'] = \ gen_elixir_sigstr_rules(rterm, token, interpol=False) return states op3_re = "|".join(re.escape(s) for s in OPERATORS3) op2_re = "|".join(re.escape(s) for s in OPERATORS2) op1_re = "|".join(re.escape(s) for s in OPERATORS1) ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re) punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) alnum = '\w' name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum} complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re) special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})' long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})' hex_char_re = r'(\\x[\da-fA-F]{1,2})' escape_char_re = r'(\\[abdefnrstv])' tokens = { 'root': [ (r'\s+', Text), (r'#.*$', Comment.Single), # Various kinds of characters (r'(\?)' + long_hex_char_re, bygroups(String.Char, String.Escape, Number.Hex, String.Escape)), (r'(\?)' + hex_char_re, bygroups(String.Char, String.Escape)), (r'(\?)' + escape_char_re, bygroups(String.Char, String.Escape)), (r'\?\\?.', String.Char), # '::' has to go before atoms (r':::', String.Symbol), (r'::', Operator), # atoms (r':' + special_atom_re, String.Symbol), (r':' + complex_name_re, String.Symbol), (r':"', String.Symbol, 'string_double_atom'), (r":'", String.Symbol, 'string_single_atom'), # [keywords: ...] (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re), bygroups(String.Symbol, Punctuation)), # @attributes (r'@' + name_re, Name.Attribute), # identifiers (name_re, Name), (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)), # operators and punctuation (op3_re, Operator), (op2_re, Operator), (punctuation_re, Punctuation), (r'&\d', Name.Entity), # anon func arguments (op1_re, Operator), # numbers (r'0b[01]+', Number.Bin), (r'0o[0-7]+', Number.Oct), (r'0x[\da-fA-F]+', Number.Hex), (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), (r'\d(_?\d)*', Number.Integer), # strings and heredocs (r'"""\s*', String.Heredoc, 'heredoc_double'), (r"'''\s*$", String.Heredoc, 'heredoc_single'), (r'"', String.Double, 'string_double'), (r"'", String.Single, 'string_single'), include('sigils'), (r'%\{', Punctuation, 'map_key'), (r'\{', Punctuation, 'tuple'), ], 'heredoc_double': [ (r'^\s*"""', String.Heredoc, '#pop'), include('heredoc_interpol'), ], 'heredoc_single': [ (r"^\s*'''", String.Heredoc, '#pop'), include('heredoc_interpol'), ], 'heredoc_interpol': [ (r'[^#\\\n]+', String.Heredoc), include('escapes'), (r'\\.', String.Heredoc), (r'\n+', String.Heredoc), include('interpol'), ], 'heredoc_no_interpol': [ (r'[^\\\n]+', String.Heredoc), (r'\\.', String.Heredoc), (r'\n+', String.Heredoc), ], 'escapes': [ (long_hex_char_re, bygroups(String.Escape, Number.Hex, String.Escape)), (hex_char_re, String.Escape), (escape_char_re, String.Escape), ], 'interpol': [ (r'#\{', String.Interpol, 'interpol_string'), ], 'interpol_string': [ (r'\}', String.Interpol, "#pop"), include('root') ], 'map_key': [ include('root'), (r':', Punctuation, 'map_val'), (r'=>', Punctuation, 'map_val'), (r'\}', Punctuation, '#pop'), ], 'map_val': [ include('root'), (r',', Punctuation, '#pop'), (r'(?=\})', Punctuation, '#pop'), ], 'tuple': [ include('root'), (r'\}', Punctuation, '#pop'), ], } tokens.update(gen_elixir_string_rules('double', '"', String.Double)) tokens.update(gen_elixir_string_rules('single', "'", String.Single)) tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) tokens.update(gen_elixir_sigil_rules()) class ElixirConsoleLexer(Lexer): """ For Elixir interactive console (iex) output like: .. sourcecode:: iex iex> [head | tail] = [1,2,3] [1,2,3] iex> head 1 iex> tail [2,3] iex> [head | tail] [1,2,3] iex> length [head | tail] 3 .. versionadded:: 1.5 """ name = 'Elixir iex session' aliases = ['iex'] mimetypes = ['text/x-elixir-shellsession'] _prompt_re = re.compile('(iex|\.{3})(\(\d+\))?> ') def get_tokens_unprocessed(self, text): exlexer = ElixirLexer(**self.options) curcode = '' in_error = False insertions = [] for match in line_re.finditer(text): line = match.group() if line.startswith(u'** '): in_error = True insertions.append((len(curcode), [(0, Generic.Error, line[:-1])])) curcode += line[-1:] else: m = self._prompt_re.match(line) if m is not None: in_error = False end = m.end() insertions.append((len(curcode), [(0, Generic.Prompt, line[:end])])) curcode += line[end:] else: if curcode: for item in do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode)): yield item curcode = '' insertions = [] token = Generic.Error if in_error else Generic.Output yield match.start(), token, line if curcode: for item in do_insertions( insertions, exlexer.get_tokens_unprocessed(curcode)): yield item