# -*- coding: utf-8 -*- """ pygments.lexers ~~~~~~~~~~~~~~~ Pygments lexers. :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re import sys import types import fnmatch from os.path import basename from pygments.lexers._mapping import LEXERS from pygments.modeline import get_filetype_from_buffer from pygments.plugin import find_plugin_lexers from pygments.util import ClassNotFound, itervalues, guess_decode __all__ = ['get_lexer_by_name', 'get_lexer_for_filename', 'find_lexer_class', 'guess_lexer'] + list(LEXERS) _lexer_cache = {} _pattern_cache = {} def _fn_matches(fn, glob): """Return whether the supplied file name fn matches pattern filename.""" if glob not in _pattern_cache: pattern = _pattern_cache[glob] = re.compile(fnmatch.translate(glob)) return pattern.match(fn) return _pattern_cache[glob].match(fn) def _load_lexers(module_name): """Load a lexer (and all others in the module too).""" mod = __import__(module_name, None, None, ['__all__']) for lexer_name in mod.__all__: cls = getattr(mod, lexer_name) _lexer_cache[cls.name] = cls def get_all_lexers(): """Return a generator of tuples in the form ``(name, aliases, filenames, mimetypes)`` of all know lexers. """ for item in itervalues(LEXERS): yield item[1:] for lexer in find_plugin_lexers(): yield lexer.name, lexer.aliases, lexer.filenames, lexer.mimetypes def find_lexer_class(name): """Lookup a lexer class by name. Return None if not found. """ if name in _lexer_cache: return _lexer_cache[name] # lookup builtin lexers for module_name, lname, aliases, _, _ in itervalues(LEXERS): if name == lname: _load_lexers(module_name) return _lexer_cache[name] # continue with lexers from setuptools entrypoints for cls in find_plugin_lexers(): if cls.name == name: return cls def get_lexer_by_name(_alias, **options): """Get a lexer by an alias. Raises ClassNotFound if not found. """ if not _alias: raise ClassNotFound('no lexer for alias %r found' % _alias) # lookup builtin lexers for module_name, name, aliases, _, _ in itervalues(LEXERS): if _alias.lower() in aliases: if name not in _lexer_cache: _load_lexers(module_name) return _lexer_cache[name](**options) # continue with lexers from setuptools entrypoints for cls in find_plugin_lexers(): if _alias.lower() in cls.aliases: return cls(**options) raise ClassNotFound('no lexer for alias %r found' % _alias) def find_lexer_class_for_filename(_fn, code=None): """Get a lexer for a filename. If multiple lexers match the filename pattern, use ``analyse_text()`` to figure out which one is more appropriate. Returns None if not found. """ matches = [] fn = basename(_fn) for modname, name, _, filenames, _ in itervalues(LEXERS): for filename in filenames: if _fn_matches(fn, filename): if name not in _lexer_cache: _load_lexers(modname) matches.append((_lexer_cache[name], filename)) for cls in find_plugin_lexers(): for filename in cls.filenames: if _fn_matches(fn, filename): matches.append((cls, filename)) if sys.version_info > (3,) and isinstance(code, bytes): # decode it, since all analyse_text functions expect unicode code = guess_decode(code) def get_rating(info): cls, filename = info # explicit patterns get a bonus bonus = '*' not in filename and 0.5 or 0 # The class _always_ defines analyse_text because it's included in # the Lexer class. The default implementation returns None which # gets turned into 0.0. Run scripts/detect_missing_analyse_text.py # to find lexers which need it overridden. if code: return cls.analyse_text(code) + bonus return cls.priority + bonus if matches: matches.sort(key=get_rating) # print "Possible lexers, after sort:", matches return matches[-1][0] def get_lexer_for_filename(_fn, code=None, **options): """Get a lexer for a filename. If multiple lexers match the filename pattern, use ``analyse_text()`` to figure out which one is more appropriate. Raises ClassNotFound if not found. """ res = find_lexer_class_for_filename(_fn, code) if not res: raise ClassNotFound('no lexer for filename %r found' % _fn) return res(**options) def get_lexer_for_mimetype(_mime, **options): """Get a lexer for a mimetype. Raises ClassNotFound if not found. """ for modname, name, _, _, mimetypes in itervalues(LEXERS): if _mime in mimetypes: if name not in _lexer_cache: _load_lexers(modname) return _lexer_cache[name](**options) for cls in find_plugin_lexers(): if _mime in cls.mimetypes: return cls(**options) raise ClassNotFound('no lexer for mimetype %r found' % _mime) def _iter_lexerclasses(plugins=True): """Return an iterator over all lexer classes.""" for key in sorted(LEXERS): module_name, name = LEXERS[key][:2] if name not in _lexer_cache: _load_lexers(module_name) yield _lexer_cache[name] if plugins: for lexer in find_plugin_lexers(): yield lexer def guess_lexer_for_filename(_fn, _text, **options): """ Lookup all lexers that handle those filenames primary (``filenames``) or secondary (``alias_filenames``). Then run a text analysis for those lexers and choose the best result. usage:: >>> from pygments.lexers import guess_lexer_for_filename >>> guess_lexer_for_filename('hello.html', '<%= @foo %>') >>> guess_lexer_for_filename('hello.html', '

{{ title|e }}

') >>> guess_lexer_for_filename('style.css', 'a { color: }') """ fn = basename(_fn) primary = {} matching_lexers = set() for lexer in _iter_lexerclasses(): for filename in lexer.filenames: if _fn_matches(fn, filename): matching_lexers.add(lexer) primary[lexer] = True for filename in lexer.alias_filenames: if _fn_matches(fn, filename): matching_lexers.add(lexer) primary[lexer] = False if not matching_lexers: raise ClassNotFound('no lexer for filename %r found' % fn) if len(matching_lexers) == 1: return matching_lexers.pop()(**options) result = [] for lexer in matching_lexers: rv = lexer.analyse_text(_text) if rv == 1.0: return lexer(**options) result.append((rv, lexer)) def type_sort(t): # sort by: # - analyse score # - is primary filename pattern? # - priority # - last resort: class name return (t[0], primary[t[1]], t[1].priority, t[1].__name__) result.sort(key=type_sort) return result[-1][1](**options) def guess_lexer(_text, **options): """Guess a lexer by strong distinctions in the text (eg, shebang).""" # try to get a vim modeline first ft = get_filetype_from_buffer(_text) if ft is not None: try: return get_lexer_by_name(ft, **options) except ClassNotFound: pass best_lexer = [0.0, None] for lexer in _iter_lexerclasses(): rv = lexer.analyse_text(_text) if rv == 1.0: return lexer(**options) if rv > best_lexer[0]: best_lexer[:] = (rv, lexer) if not best_lexer[0] or best_lexer[1] is None: raise ClassNotFound('no lexer matching the text found') return best_lexer[1](**options) class _automodule(types.ModuleType): """Automatically import lexers.""" def __getattr__(self, name): info = LEXERS.get(name) if info: _load_lexers(info[0]) cls = _lexer_cache[info[1]] setattr(self, name, cls) return cls raise AttributeError(name) oldmod = sys.modules[__name__] newmod = _automodule(__name__) newmod.__dict__.update(oldmod.__dict__) sys.modules[__name__] = newmod del newmod.newmod, newmod.oldmod, newmod.sys, newmod.types