Improve Matlab language detection

This commit is contained in:
Alan Hamlett 2017-06-07 23:32:03 -07:00
parent be80465193
commit 87f205484f
9 changed files with 95 additions and 15 deletions

View file

@ -3,4 +3,5 @@ mock==2.0.0
nose==1.3.7
nose-capturestderr==1.2
nose-exclude==0.5.0
nose-watch==0.9.1
testfixtures==5.0.0

View file

View file

@ -0,0 +1,10 @@
function foo = bar(a, b, c)
% This is a variable
avariables = 0;
% =============================================================
% This is a Matlab comment
% =============================================================
end

View file

@ -237,3 +237,51 @@ class LanguagesTestCase(utils.TestCase):
language = u('F#')
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)
def test_objectivec_detected_over_matlab_when_file_empty(self):
response = Response()
response.status_code = 500
self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response
now = u(int(time.time()))
config = 'tests/samples/configs/good_config.cfg'
entity = 'tests/samples/codefiles/matlab/empty.m'
args = ['--file', entity, '--config', config, '--time', now]
retval = execute(args)
self.assertEquals(retval, 102)
language = u('Objective-C')
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)
def test_matlab_detected(self):
response = Response()
response.status_code = 500
self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response
now = u(int(time.time()))
config = 'tests/samples/configs/good_config.cfg'
entity = 'tests/samples/codefiles/matlab/matlab.m'
args = ['--file', entity, '--config', config, '--time', now]
retval = execute(args)
self.assertEquals(retval, 102)
language = u('Matlab')
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)
def test_matlab_detected_over_objectivec_when_mat_file_in_folder(self):
response = Response()
response.status_code = 500
self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response
now = u(int(time.time()))
config = 'tests/samples/configs/good_config.cfg'
entity = 'tests/samples/codefiles/matlab/with_mat_files/empty.m'
args = ['--file', entity, '--config', config, '--time', now]
retval = execute(args)
self.assertEquals(retval, 102)
language = u('Matlab')
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)

View file

@ -11,8 +11,8 @@
LANGUAGES = {
'typescript': 0.01,
'f#': 0.01,
'perl': 0.01,
'perl6': 0.01,
'f#': 0.01,
'typescript': 0.01,
}

View file

@ -134,9 +134,9 @@ class MatlabLexer(RegexLexer):
}
def analyse_text(text):
if re.match('^\s*%', text, re.M): # comment
if re.search(r'^\s*%', text, re.M): # comment
return 0.2
elif re.match('^!\w+', text, re.M): # system cmd
elif re.search(r'^!\w+', text, re.M): # system cmd
return 0.2

View file

@ -172,10 +172,7 @@ def get_language_from_extension(file_name):
if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))):
return 'C'
directory = os.path.dirname(file_name)
available_files = os.listdir(directory)
available_extensions = list(zip(*map(os.path.splitext, available_files)))[1]
available_extensions = [ext.lower() for ext in available_extensions]
available_extensions = extensions_in_same_folder(file_name)
if '.cpp' in available_extensions:
return 'C++'
if '.c' in available_extensions:
@ -300,7 +297,7 @@ def custom_pygments_guess_lexer_for_filename(_fn, _text, **options):
rv = lexer.analyse_text(_text)
if rv == 1.0:
return lexer(**options)
result.append((rv, customize_priority(lexer)))
result.append(customize_lexer_priority(_fn, rv, lexer))
def type_sort(t):
# sort by:
@ -308,16 +305,40 @@ def custom_pygments_guess_lexer_for_filename(_fn, _text, **options):
# - is primary filename pattern?
# - priority
# - last resort: class name
return (t[0], primary[t[1]], t[1].priority, t[1].__name__)
return (t[0], primary[t[2]], t[1], t[2].__name__)
result.sort(key=type_sort)
return result[-1][1](**options)
return result[-1][2](**options)
def customize_priority(lexer):
"""Return an integer priority for the given lexer object."""
def customize_lexer_priority(file_name, accuracy, lexer):
"""Customize lexer priority"""
priority = lexer.priority
lexer_name = lexer.name.lower().replace('sharp', '#')
if lexer_name in LANGUAGES:
lexer.priority = LANGUAGES[lexer_name]
return lexer
priority = LANGUAGES[lexer_name]
elif lexer_name == 'matlab':
available_extensions = extensions_in_same_folder(file_name)
if '.mat' in available_extensions:
priority = 0.06
return (accuracy, priority, lexer)
EXTENSION_CACHE = {}
def extensions_in_same_folder(file_name):
"""Returns a list of file extensions from the same folder as file_name."""
global EXTENSION_CACHE
if file_name in EXTENSION_CACHE:
return EXTENSION_CACHE[file_name]
directory = os.path.dirname(file_name)
files = os.listdir(directory)
extensions = list(zip(*map(os.path.splitext, files)))[1]
extensions = set([ext.lower() for ext in extensions])
EXTENSION_CACHE[file_name] = extensions
return extensions