Improved Matlab vs Objective-C language detection
This commit is contained in:
parent
f38b026e6a
commit
cb67d6bb2f
12 changed files with 104 additions and 15 deletions
|
@ -12,3 +12,8 @@
|
|||
|
||||
class NotYetImplemented(Exception):
|
||||
"""This method needs to be implemented."""
|
||||
|
||||
|
||||
class SkipHeartbeat(Exception):
|
||||
"""Raised to prevent the current heartbeat from being sent."""
|
||||
pass
|
||||
|
|
|
@ -12,6 +12,7 @@ import logging
|
|||
import re
|
||||
|
||||
from .compat import u, json
|
||||
from .exceptions import SkipHeartbeat
|
||||
from .project import get_project_info
|
||||
from .stats import get_file_stats
|
||||
from .utils import get_user_agent, should_exclude, format_file_path, find_project_file
|
||||
|
@ -77,12 +78,17 @@ class Heartbeat(object):
|
|||
self.project = project
|
||||
self.branch = branch
|
||||
|
||||
stats = get_file_stats(self.entity,
|
||||
entity_type=self.type,
|
||||
lineno=data.get('lineno'),
|
||||
cursorpos=data.get('cursorpos'),
|
||||
plugin=args.plugin,
|
||||
language=data.get('language'))
|
||||
try:
|
||||
stats = get_file_stats(self.entity,
|
||||
entity_type=self.type,
|
||||
lineno=data.get('lineno'),
|
||||
cursorpos=data.get('cursorpos'),
|
||||
plugin=args.plugin,
|
||||
language=data.get('language'))
|
||||
except SkipHeartbeat as ex:
|
||||
self.skip = u(ex) or 'Skipping'
|
||||
return
|
||||
|
||||
else:
|
||||
self.project = data.get('project')
|
||||
self.branch = data.get('branch')
|
||||
|
|
|
@ -17,6 +17,7 @@ import sys
|
|||
from .compat import u, open
|
||||
from .constants import MAX_FILE_SIZE_SUPPORTED
|
||||
from .dependencies import DependencyParser
|
||||
from .exceptions import SkipHeartbeat
|
||||
from .language_priorities import LANGUAGES
|
||||
|
||||
from .packages.pygments.lexers import (
|
||||
|
@ -120,6 +121,8 @@ def guess_lexer_using_filename(file_name, text):
|
|||
|
||||
try:
|
||||
lexer = custom_pygments_guess_lexer_for_filename(file_name, text)
|
||||
except SkipHeartbeat as ex:
|
||||
raise SkipHeartbeat(u(ex))
|
||||
except:
|
||||
log.traceback(logging.DEBUG)
|
||||
|
||||
|
@ -169,7 +172,7 @@ def get_language_from_extension(file_name):
|
|||
|
||||
filepart, extension = os.path.splitext(file_name)
|
||||
|
||||
if re.match(r'\.h.*', extension, re.IGNORECASE) or re.match(r'\.c.*', extension, re.IGNORECASE):
|
||||
if re.match(r'\.h.*$', extension, re.IGNORECASE) or re.match(r'\.c.*$', extension, re.IGNORECASE):
|
||||
|
||||
if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))):
|
||||
return 'C'
|
||||
|
@ -180,6 +183,18 @@ def get_language_from_extension(file_name):
|
|||
if '.c' in available_extensions:
|
||||
return 'C'
|
||||
|
||||
if os.path.exists(u('{0}{1}').format(u(filepart), u('.m'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.M'))):
|
||||
return 'Objective-C'
|
||||
|
||||
if os.path.exists(u('{0}{1}').format(u(filepart), u('.mm'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.MM'))):
|
||||
return 'Objective-C++'
|
||||
|
||||
if re.match(r'\.m$', extension, re.IGNORECASE) and (os.path.exists(u('{0}{1}').format(u(filepart), u('.h'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.H')))):
|
||||
return 'Objective-C'
|
||||
|
||||
if re.match(r'\.mm$', extension, re.IGNORECASE) and (os.path.exists(u('{0}{1}').format(u(filepart), u('.h'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.H')))):
|
||||
return 'Objective-C++'
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
@ -308,6 +323,12 @@ def custom_pygments_guess_lexer_for_filename(_fn, _text, **options):
|
|||
return lexer(**options)
|
||||
result.append(customize_lexer_priority(_fn, rv, lexer))
|
||||
|
||||
matlab = list(filter(lambda x: x[2].name.lower() == 'matlab', result))
|
||||
if len(matlab) > 0:
|
||||
objc = list(filter(lambda x: x[2].name.lower() == 'objective-c', result))
|
||||
if objc and objc[0][0] == matlab[0][0]:
|
||||
raise SkipHeartbeat('Skipping because not enough language accuracy.')
|
||||
|
||||
def type_sort(t):
|
||||
# sort by:
|
||||
# - analyse score
|
||||
|
@ -330,8 +351,12 @@ def customize_lexer_priority(file_name, accuracy, lexer):
|
|||
priority = LANGUAGES[lexer_name]
|
||||
elif lexer_name == 'matlab':
|
||||
available_extensions = extensions_in_same_folder(file_name)
|
||||
if '.mat' in available_extensions:
|
||||
priority = 0.06
|
||||
if '.mat' in available_extensions or '.h' not in available_extensions:
|
||||
priority = 0.6
|
||||
elif lexer_name == 'objective-c':
|
||||
available_extensions = extensions_in_same_folder(file_name)
|
||||
if '.mat' in available_extensions or '.h' not in available_extensions:
|
||||
priority = 0.0
|
||||
|
||||
return (accuracy, priority, lexer)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue