From 9ce9d528fdb41290073f67b2d6ac24968cdc0f56 Mon Sep 17 00:00:00 2001 From: Alan Hamlett Date: Sun, 26 Feb 2017 16:24:43 -0800 Subject: [PATCH] detect non-C languages in folders with C/C++ files Improves on 2da75aa119bb47a803fbd91a2f765f9c65fa4930. --- .../{see.c => c_and_cpp/non_empty.cpp} | 0 .../{see.h => c_and_cpp/non_empty.h} | 0 .../{seeplusplus.h => c_and_python/foo.c} | 0 tests/samples/codefiles/c_and_python/see.h | 0 tests/samples/codefiles/c_and_python/see.py | 0 .../{seeplusplus.cpp => c_only/non_empty.c} | 0 tests/samples/codefiles/c_only/non_empty.h | 0 .../codefiles/{ => csharp}/seesharp.cs | 0 tests/test_dependencies.py | 12 +++--- tests/test_languages.py | 41 ++++++++----------- wakatime/stats.py | 26 ++++++++---- 11 files changed, 39 insertions(+), 40 deletions(-) rename tests/samples/codefiles/{see.c => c_and_cpp/non_empty.cpp} (100%) rename tests/samples/codefiles/{see.h => c_and_cpp/non_empty.h} (100%) rename tests/samples/codefiles/{seeplusplus.h => c_and_python/foo.c} (100%) create mode 100644 tests/samples/codefiles/c_and_python/see.h create mode 100644 tests/samples/codefiles/c_and_python/see.py rename tests/samples/codefiles/{seeplusplus.cpp => c_only/non_empty.c} (100%) create mode 100644 tests/samples/codefiles/c_only/non_empty.h rename tests/samples/codefiles/{ => csharp}/seesharp.cs (100%) diff --git a/tests/samples/codefiles/see.c b/tests/samples/codefiles/c_and_cpp/non_empty.cpp similarity index 100% rename from tests/samples/codefiles/see.c rename to tests/samples/codefiles/c_and_cpp/non_empty.cpp diff --git a/tests/samples/codefiles/see.h b/tests/samples/codefiles/c_and_cpp/non_empty.h similarity index 100% rename from tests/samples/codefiles/see.h rename to tests/samples/codefiles/c_and_cpp/non_empty.h diff --git a/tests/samples/codefiles/seeplusplus.h b/tests/samples/codefiles/c_and_python/foo.c similarity index 100% rename from tests/samples/codefiles/seeplusplus.h rename to tests/samples/codefiles/c_and_python/foo.c diff --git a/tests/samples/codefiles/c_and_python/see.h b/tests/samples/codefiles/c_and_python/see.h new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/c_and_python/see.py b/tests/samples/codefiles/c_and_python/see.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/seeplusplus.cpp b/tests/samples/codefiles/c_only/non_empty.c similarity index 100% rename from tests/samples/codefiles/seeplusplus.cpp rename to tests/samples/codefiles/c_only/non_empty.c diff --git a/tests/samples/codefiles/c_only/non_empty.h b/tests/samples/codefiles/c_only/non_empty.h new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/seesharp.cs b/tests/samples/codefiles/csharp/seesharp.cs similarity index 100% rename from tests/samples/codefiles/seesharp.cs rename to tests/samples/codefiles/csharp/seesharp.cs diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index d85b72c..04e86d2 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -42,7 +42,7 @@ class DependenciesTestCase(utils.TestCase): def test_token_parser(self): with self.assertRaises(NotYetImplemented): - source_file = 'tests/samples/codefiles/see.h' + source_file = 'tests/samples/codefiles/c_only/non_empty.h' parser = TokenParser(source_file) parser.parse() @@ -458,7 +458,7 @@ class DependenciesTestCase(utils.TestCase): self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response with utils.TemporaryDirectory() as tempdir: - entity = 'tests/samples/codefiles/see.c' + entity = 'tests/samples/codefiles/c_only/non_empty.c' shutil.copy(entity, os.path.join(tempdir, 'see.c')) entity = os.path.realpath(os.path.join(tempdir, 'see.c')) @@ -510,9 +510,9 @@ class DependenciesTestCase(utils.TestCase): self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response with utils.TemporaryDirectory() as tempdir: - entity = 'tests/samples/codefiles/seeplusplus.cpp' - shutil.copy(entity, os.path.join(tempdir, 'seeplusplus.cpp')) - entity = os.path.realpath(os.path.join(tempdir, 'seeplusplus.cpp')) + entity = 'tests/samples/codefiles/c_and_cpp/non_empty.cpp' + shutil.copy(entity, os.path.join(tempdir, 'non_empty.cpp')) + entity = os.path.realpath(os.path.join(tempdir, 'non_empty.cpp')) now = u(int(time.time())) config = 'tests/samples/configs/good_config.cfg' @@ -562,7 +562,7 @@ class DependenciesTestCase(utils.TestCase): self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response with utils.TemporaryDirectory() as tempdir: - entity = 'tests/samples/codefiles/seesharp.cs' + entity = 'tests/samples/codefiles/csharp/seesharp.cs' shutil.copy(entity, os.path.join(tempdir, 'seesharp.cs')) entity = os.path.realpath(os.path.join(tempdir, 'seesharp.cs')) diff --git a/tests/test_languages.py b/tests/test_languages.py index 7ea9bc0..af865dc 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -23,31 +23,6 @@ class LanguagesTestCase(utils.TestCase): ['wakatime.session_cache.SessionCache.connect', None], ] - def test_language_detected_for_header_file(self): - response = Response() - response.status_code = 500 - self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response - - now = u(int(time.time())) - config = 'tests/samples/configs/good_config.cfg' - entity = 'tests/samples/codefiles/see.h' - args = ['--file', entity, '--config', config, '--time', now] - - retval = execute(args) - self.assertEquals(retval, 102) - - language = u('C') - self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language) - - entity = 'tests/samples/codefiles/seeplusplus.h' - args[1] = entity - - retval = execute(args) - self.assertEquals(retval, 102) - - language = u('C++') - self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language) - def test_c_language_detected_for_header_with_c_files_in_folder(self): response = Response() response.status_code = 500 @@ -80,6 +55,22 @@ class LanguagesTestCase(utils.TestCase): language = u('C++') self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language) + def test_c_not_detected_for_non_header_with_c_files_in_folder(self): + response = Response() + response.status_code = 500 + self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response + + now = u(int(time.time())) + config = 'tests/samples/configs/good_config.cfg' + entity = 'tests/samples/codefiles/c_and_python/see.py' + args = ['--file', entity, '--config', config, '--time', now] + + retval = execute(args) + self.assertEquals(retval, 102) + + language = u('Python') + self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language) + def test_guess_language(self): with utils.mock.patch('wakatime.stats.smart_guess_lexer') as mock_guess_lexer: mock_guess_lexer.return_value = None diff --git a/wakatime/stats.py b/wakatime/stats.py index 269d02f..c15ebd5 100644 --- a/wakatime/stats.py +++ b/wakatime/stats.py @@ -11,6 +11,7 @@ import logging import os +import re import sys from .compat import u, open @@ -64,13 +65,18 @@ def get_file_stats(file_name, entity_type='file', lineno=None, cursorpos=None, def guess_language(file_name): """Guess lexer and language for a file. - Returns (language, lexer) tuple where language is a unicode string. + Returns a tuple of (language_str, lexer_obj). """ + lexer = None + language = get_language_from_extension(file_name) - lexer = smart_guess_lexer(file_name) - if language is None and lexer is not None: - language = u(lexer.name) + if language: + lexer = get_lexer(language) + else: + lexer = smart_guess_lexer(file_name) + if lexer: + language = u(lexer.name) return language, lexer @@ -151,15 +157,17 @@ def guess_lexer_using_modeline(text): def get_language_from_extension(file_name): """Returns a matching language for the given file extension. + + When guessed_language is 'C', does not restrict to known file extensions. """ filepart, extension = os.path.splitext(file_name) - if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))): - return 'C' + if re.match(r'\.h.*', extension, re.IGNORECASE) or re.match(r'\.c.*', extension, re.IGNORECASE): + + if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))): + return 'C' - extension = extension.lower() - if extension == '.h': directory = os.path.dirname(file_name) available_files = os.listdir(directory) available_extensions = list(zip(*map(os.path.splitext, available_files)))[1] @@ -191,7 +199,7 @@ def number_lines_in_file(file_name): def standardize_language(language, plugin): """Maps a string to the equivalent Pygments language. - Returns a tuple of (language_name, lexer_object). + Returns a tuple of (language_str, lexer_obj). """ if not language: