From cb67d6bb2f5600c8f59903d949ea032952c6e7a6 Mon Sep 17 00:00:00 2001 From: Alan Hamlett Date: Thu, 15 Mar 2018 01:15:16 -0700 Subject: [PATCH] Improved Matlab vs Objective-C language detection --- .../codefiles/{matlab => c_and_cpp}/empty.m | 0 tests/samples/codefiles/c_and_cpp/empty.mm | 0 .../codefiles/matlab/with_headers/empty.m | 0 .../codefiles/matlab/with_headers/matlab.m | 10 ++++ .../codefiles/matlab/with_headers/random.h | 0 .../matlab/with_mat_files/objective-c.h | 0 .../matlab/with_mat_files/objective-c.m | 0 .../codefiles/matlab/without_headers/empty.m | 0 tests/test_languages.py | 55 +++++++++++++++++-- wakatime/exceptions.py | 5 ++ wakatime/heartbeat.py | 18 ++++-- wakatime/stats.py | 31 ++++++++++- 12 files changed, 104 insertions(+), 15 deletions(-) rename tests/samples/codefiles/{matlab => c_and_cpp}/empty.m (100%) create mode 100644 tests/samples/codefiles/c_and_cpp/empty.mm create mode 100644 tests/samples/codefiles/matlab/with_headers/empty.m create mode 100755 tests/samples/codefiles/matlab/with_headers/matlab.m create mode 100644 tests/samples/codefiles/matlab/with_headers/random.h create mode 100644 tests/samples/codefiles/matlab/with_mat_files/objective-c.h create mode 100644 tests/samples/codefiles/matlab/with_mat_files/objective-c.m create mode 100644 tests/samples/codefiles/matlab/without_headers/empty.m diff --git a/tests/samples/codefiles/matlab/empty.m b/tests/samples/codefiles/c_and_cpp/empty.m similarity index 100% rename from tests/samples/codefiles/matlab/empty.m rename to tests/samples/codefiles/c_and_cpp/empty.m diff --git a/tests/samples/codefiles/c_and_cpp/empty.mm b/tests/samples/codefiles/c_and_cpp/empty.mm new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/matlab/with_headers/empty.m b/tests/samples/codefiles/matlab/with_headers/empty.m new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/matlab/with_headers/matlab.m b/tests/samples/codefiles/matlab/with_headers/matlab.m new file mode 100755 index 0000000..11259e5 --- /dev/null +++ b/tests/samples/codefiles/matlab/with_headers/matlab.m @@ -0,0 +1,10 @@ +function foo = bar(a, b, c) + +% This is a variable +avariables = 0; + +% ============================================================= +% This is a Matlab comment +% ============================================================= + +end diff --git a/tests/samples/codefiles/matlab/with_headers/random.h b/tests/samples/codefiles/matlab/with_headers/random.h new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/matlab/with_mat_files/objective-c.h b/tests/samples/codefiles/matlab/with_mat_files/objective-c.h new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/matlab/with_mat_files/objective-c.m b/tests/samples/codefiles/matlab/with_mat_files/objective-c.m new file mode 100644 index 0000000..e69de29 diff --git a/tests/samples/codefiles/matlab/without_headers/empty.m b/tests/samples/codefiles/matlab/without_headers/empty.m new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_languages.py b/tests/test_languages.py index 403d69d..a1f7d44 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -74,6 +74,18 @@ class LanguagesTestCase(utils.TestCase): entity='c_and_python/see.py', ) + def test_objectivec_language_detected_when_header_files_in_folder(self): + self.shared( + expected_language='Objective-C', + entity='c_and_cpp/empty.m', + ) + + def test_objectivecpp_language_detected_when_header_files_in_folder(self): + self.shared( + expected_language='Objective-C++', + entity='c_and_cpp/empty.mm', + ) + def test_guess_language(self): with utils.mock.patch('wakatime.stats.smart_guess_lexer') as mock_guess_lexer: mock_guess_lexer.return_value = None @@ -157,12 +169,6 @@ class LanguagesTestCase(utils.TestCase): entity='fsharp.fs', ) - def test_objectivec_detected_over_matlab_when_file_empty(self): - self.shared( - expected_language='Objective-C', - entity='matlab/empty.m', - ) - def test_matlab_detected(self): self.shared( expected_language='Matlab', @@ -175,6 +181,43 @@ class LanguagesTestCase(utils.TestCase): entity='matlab/with_mat_files/empty.m', ) + def test_objectivec_detected_over_matlab_with_matching_header(self): + self.shared( + expected_language='Objective-C', + entity='matlab/with_mat_files/objective-c.m', + ) + + def test_objectivec_detected_over_matlab_with_non_maching_headers_present(self): + self.shared( + expected_language='Objective-C', + entity='matlab/with_headers/empty.m', + ) + + def test_matlab_detected_over_objectivec_when_header_in_folder(self): + self.shared( + expected_language='Matlab', + entity='matlab/with_headers/matlab.m', + ) + + def test_heartbeat_skipped_when_matlab_same_accuracy(self): + self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = CustomResponse() + + entity = 'matlab/without_headers/empty.m' + + config = 'tests/samples/configs/good_config.cfg' + entity = os.path.join('tests/samples/codefiles', entity) + + now = u(int(time.time())) + args = ['--file', entity, '--config', config, '--time', now] + + retval = execute(args) + self.assertEquals(retval, SUCCESS) + self.assertNothingPrinted() + self.assertHeartbeatNotSent() + self.assertHeartbeatNotSavedOffline() + self.assertOfflineHeartbeatsSynced() + self.assertSessionCacheUntouched() + def test_mjs_javascript_module_extension_detected(self): self.shared( expected_language='JavaScript', diff --git a/wakatime/exceptions.py b/wakatime/exceptions.py index e99ba90..a0d5285 100644 --- a/wakatime/exceptions.py +++ b/wakatime/exceptions.py @@ -12,3 +12,8 @@ class NotYetImplemented(Exception): """This method needs to be implemented.""" + + +class SkipHeartbeat(Exception): + """Raised to prevent the current heartbeat from being sent.""" + pass diff --git a/wakatime/heartbeat.py b/wakatime/heartbeat.py index c0b2e02..757838e 100644 --- a/wakatime/heartbeat.py +++ b/wakatime/heartbeat.py @@ -12,6 +12,7 @@ import logging import re from .compat import u, json +from .exceptions import SkipHeartbeat from .project import get_project_info from .stats import get_file_stats from .utils import get_user_agent, should_exclude, format_file_path, find_project_file @@ -77,12 +78,17 @@ class Heartbeat(object): self.project = project self.branch = branch - stats = get_file_stats(self.entity, - entity_type=self.type, - lineno=data.get('lineno'), - cursorpos=data.get('cursorpos'), - plugin=args.plugin, - language=data.get('language')) + try: + stats = get_file_stats(self.entity, + entity_type=self.type, + lineno=data.get('lineno'), + cursorpos=data.get('cursorpos'), + plugin=args.plugin, + language=data.get('language')) + except SkipHeartbeat as ex: + self.skip = u(ex) or 'Skipping' + return + else: self.project = data.get('project') self.branch = data.get('branch') diff --git a/wakatime/stats.py b/wakatime/stats.py index f58aed3..d97b50f 100644 --- a/wakatime/stats.py +++ b/wakatime/stats.py @@ -17,6 +17,7 @@ import sys from .compat import u, open from .constants import MAX_FILE_SIZE_SUPPORTED from .dependencies import DependencyParser +from .exceptions import SkipHeartbeat from .language_priorities import LANGUAGES from .packages.pygments.lexers import ( @@ -120,6 +121,8 @@ def guess_lexer_using_filename(file_name, text): try: lexer = custom_pygments_guess_lexer_for_filename(file_name, text) + except SkipHeartbeat as ex: + raise SkipHeartbeat(u(ex)) except: log.traceback(logging.DEBUG) @@ -169,7 +172,7 @@ def get_language_from_extension(file_name): filepart, extension = os.path.splitext(file_name) - if re.match(r'\.h.*', extension, re.IGNORECASE) or re.match(r'\.c.*', extension, re.IGNORECASE): + if re.match(r'\.h.*$', extension, re.IGNORECASE) or re.match(r'\.c.*$', extension, re.IGNORECASE): if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))): return 'C' @@ -180,6 +183,18 @@ def get_language_from_extension(file_name): if '.c' in available_extensions: return 'C' + if os.path.exists(u('{0}{1}').format(u(filepart), u('.m'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.M'))): + return 'Objective-C' + + if os.path.exists(u('{0}{1}').format(u(filepart), u('.mm'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.MM'))): + return 'Objective-C++' + + if re.match(r'\.m$', extension, re.IGNORECASE) and (os.path.exists(u('{0}{1}').format(u(filepart), u('.h'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.H')))): + return 'Objective-C' + + if re.match(r'\.mm$', extension, re.IGNORECASE) and (os.path.exists(u('{0}{1}').format(u(filepart), u('.h'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.H')))): + return 'Objective-C++' + return None @@ -308,6 +323,12 @@ def custom_pygments_guess_lexer_for_filename(_fn, _text, **options): return lexer(**options) result.append(customize_lexer_priority(_fn, rv, lexer)) + matlab = list(filter(lambda x: x[2].name.lower() == 'matlab', result)) + if len(matlab) > 0: + objc = list(filter(lambda x: x[2].name.lower() == 'objective-c', result)) + if objc and objc[0][0] == matlab[0][0]: + raise SkipHeartbeat('Skipping because not enough language accuracy.') + def type_sort(t): # sort by: # - analyse score @@ -330,8 +351,12 @@ def customize_lexer_priority(file_name, accuracy, lexer): priority = LANGUAGES[lexer_name] elif lexer_name == 'matlab': available_extensions = extensions_in_same_folder(file_name) - if '.mat' in available_extensions: - priority = 0.06 + if '.mat' in available_extensions or '.h' not in available_extensions: + priority = 0.6 + elif lexer_name == 'objective-c': + available_extensions = extensions_in_same_folder(file_name) + if '.mat' in available_extensions or '.h' not in available_extensions: + priority = 0.0 return (accuracy, priority, lexer)