From 258a676759b79239e287e535f797b9fe5313b9a0 Mon Sep 17 00:00:00 2001 From: Alan Hamlett Date: Mon, 15 Jun 2015 18:27:58 -0700 Subject: [PATCH] limit language detection to known file extensions, unless file contents has a vim modeline --- plugin/packages/wakatime/stats.py | 56 ++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/plugin/packages/wakatime/stats.py b/plugin/packages/wakatime/stats.py index 874cf00..7a4c91b 100644 --- a/plugin/packages/wakatime/stats.py +++ b/plugin/packages/wakatime/stats.py @@ -20,13 +20,14 @@ if sys.version_info[0] == 2: sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2')) else: sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py3')) -from pygments.lexers import guess_lexer, guess_lexer_for_filename +from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename +from pygments.modeline import get_filetype_from_buffer log = logging.getLogger('WakaTime') -# force file name extensions to be recognized as a certain language +# extensions taking priority over lexer EXTENSIONS = { 'j2': 'HTML', 'markdown': 'Markdown', @@ -34,6 +35,8 @@ EXTENSIONS = { 'mdown': 'Markdown', 'twig': 'Twig', } + +# lexers to human readable languages TRANSLATIONS = { 'CSS+Genshi Text': 'CSS', 'CSS+Lasso': 'CSS', @@ -45,6 +48,11 @@ TRANSLATIONS = { 'RHTML': 'HTML', } +# extensions for when no lexer is found +AUXILIARY_EXTENSIONS = { + 'vb': 'VB.net', +} + def guess_language(file_name): """Guess lexer and language for a file. @@ -58,11 +66,17 @@ def guess_language(file_name): # guess language from file extension if file_name: - language = guess_language_from_extension(file_name.rsplit('.', 1)[-1]) + language = get_language_from_extension(file_name, EXTENSIONS) # get language from lexer if we didn't have a hard-coded extension rule if language is None and lexer: - language = translate_language(u(lexer.name)) + language = u(lexer.name) + + if language is None: + language = get_language_from_extension(file_name, AUXILIARY_EXTENSIONS) + + if language is not None: + language = translate_language(language) return language, lexer @@ -79,11 +93,11 @@ def smart_guess_lexer(file_name): text = get_file_contents(file_name) try: - guess_1 = guess_lexer(text) + guess_1 = guess_lexer_for_filename(file_name, text) except: guess_1 = None try: - guess_2 = guess_lexer_for_filename(file_name, text) + guess_2 = guess_lexer_using_modeline(text) except: guess_2 = None try: @@ -104,15 +118,33 @@ def smart_guess_lexer(file_name): return lexer -def guess_language_from_extension(extension): - """Checks hard-coded extension map for a matching language. +def get_language_from_extension(file_name, extension_map): + """Returns a matching language for the given file_name using extension_map. """ + extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else None + if extension: - if extension in EXTENSIONS: - return EXTENSIONS[extension] - if extension.lower() in EXTENSIONS: - return EXTENSIONS[extension.lower()] + if extension in extension_map: + return extension_map[extension] + if extension.lower() in extension_map: + return extension_map[extension.lower()] + + return None + + +def guess_lexer_using_modeline(text): + """Guess lexer for given text using Vim modeline. + """ + + file_type = get_filetype_from_buffer(text) + + if file_type is not None: + try: + return get_lexer_by_name(file_type) + except: + pass + return None