From 6e9e8a9ce2ccb98772eb216b275353afdb5ce71e Mon Sep 17 00:00:00 2001 From: Alan Hamlett Date: Mon, 10 Aug 2015 11:33:39 -0700 Subject: [PATCH] move language rules to server, detect header files - move language translations to api server - move extension rules to api server - detect correct header file language based on presence of .cpp or .c files named the same as the .h file --- wakatime/stats.py | 87 ++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 66 deletions(-) diff --git a/wakatime/stats.py b/wakatime/stats.py index 39699a6..a862f8d 100644 --- a/wakatime/stats.py +++ b/wakatime/stats.py @@ -28,56 +28,19 @@ from pygments.util import ClassNotFound log = logging.getLogger('WakaTime') -# extensions taking priority over lexer -EXTENSIONS = { - 'j2': 'HTML', - 'markdown': 'Markdown', - 'md': 'Markdown', - 'mdown': 'Markdown', - 'twig': 'Twig', -} - -# lexers to human readable languages -TRANSLATIONS = { - 'CSS+Genshi Text': 'CSS', - 'CSS+Lasso': 'CSS', - 'HTML+Django/Jinja': 'HTML', - 'HTML+Lasso': 'HTML', - 'JavaScript+Genshi Text': 'JavaScript', - 'JavaScript+Lasso': 'JavaScript', - 'Perl6': 'Perl', - 'RHTML': 'HTML', -} - -# extensions for when no lexer is found -AUXILIARY_EXTENSIONS = { - 'vb': 'VB.net', -} - - def guess_language(file_name): """Guess lexer and language for a file. Returns (language, lexer) tuple where language is a unicode string. """ + language = get_language_from_extension(file_name) + if language: + return language, None + lexer = smart_guess_lexer(file_name) - language = None - - # guess language from file extension - if file_name: - language = get_language_from_extension(file_name, EXTENSIONS) - - # get language from lexer if we didn't have a hard-coded extension rule - if language is None and lexer: - language = u(lexer.name) - - if language is None: - language = get_language_from_extension(file_name, AUXILIARY_EXTENSIONS) - - if language is not None: - language = translate_language(language) + language = u(lexer.name) return language, lexer @@ -93,14 +56,14 @@ def smart_guess_lexer(file_name): text = get_file_contents(file_name) - lexer_1, accuracy_1 = guess_lexer_using_filename(file_name, text) - lexer_2, accuracy_2 = guess_lexer_using_modeline(text) + lexer1, accuracy1 = guess_lexer_using_filename(file_name, text) + lexer2, accuracy2 = guess_lexer_using_modeline(text) - if lexer_1: - lexer = lexer_1 - if (lexer_2 and accuracy_2 and - (not accuracy_1 or accuracy_2 > accuracy_1)): - lexer = lexer_2 + if lexer1: + lexer = lexer1 + if (lexer2 and accuracy2 and + (not accuracy1 or accuracy2 > accuracy1)): + lexer = lexer2 return lexer @@ -156,30 +119,22 @@ def guess_lexer_using_modeline(text): return lexer, accuracy -def get_language_from_extension(file_name, extension_map): - """Returns a matching language for the given file_name using extension_map. +def get_language_from_extension(file_name): + """Returns a matching language for the given file extension. """ - extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else None + extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else '' + file_minus_extension = file_name.rsplit('.', 1)[0] if len(file_name.rsplit('.', 1)) > 1 else file_name - if extension: - if extension in extension_map: - return extension_map[extension] - if extension.lower() in extension_map: - return extension_map[extension.lower()] + if extension.lower() == 'h': + if os.path.isfile(file_minus_extension + '.cpp'): + return 'C++' + if os.path.isfile(file_minus_extension + '.c'): + return 'C' return None -def translate_language(language): - """Turns Pygments lexer class name string into human-readable language. - """ - - if language in TRANSLATIONS: - language = TRANSLATIONS[language] - return language - - def number_lines_in_file(file_name): lines = 0 try: