ability to prioritize popular languages over uncommon languages
This commit is contained in:
parent
9ce9d528fd
commit
8c2685696c
4 changed files with 74 additions and 5 deletions
1
tests/samples/codefiles/perl.pl
Normal file
1
tests/samples/codefiles/perl.pl
Normal file
|
@ -0,0 +1 @@
|
|||
|
|
@ -190,7 +190,7 @@ class LanguagesTestCase(utils.TestCase):
|
|||
language = None
|
||||
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)
|
||||
|
||||
def test_typescript_detected_correctly(self):
|
||||
def test_typescript_detected_over_typoscript(self):
|
||||
response = Response()
|
||||
response.status_code = 500
|
||||
self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response
|
||||
|
@ -205,3 +205,19 @@ class LanguagesTestCase(utils.TestCase):
|
|||
|
||||
language = u('TypeScript')
|
||||
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)
|
||||
|
||||
def test_perl_detected_over_prolog(self):
|
||||
response = Response()
|
||||
response.status_code = 500
|
||||
self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response
|
||||
|
||||
now = u(int(time.time()))
|
||||
config = 'tests/samples/configs/good_config.cfg'
|
||||
entity = 'tests/samples/codefiles/perl.pl'
|
||||
args = ['--file', entity, '--config', config, '--time', now]
|
||||
|
||||
retval = execute(args)
|
||||
self.assertEquals(retval, 102)
|
||||
|
||||
language = u('Perl')
|
||||
self.assertEqual(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0].get('language'), language)
|
||||
|
|
|
@ -450,7 +450,6 @@ class TypeScriptLexer(RegexLexer):
|
|||
aliases = ['ts', 'typescript']
|
||||
filenames = ['*.ts', '*.tsx']
|
||||
mimetypes = ['text/x-typescript']
|
||||
priority = 0.11
|
||||
|
||||
flags = re.DOTALL | re.MULTILINE
|
||||
|
||||
|
|
|
@ -18,10 +18,12 @@ from .compat import u, open
|
|||
from .dependencies import DependencyParser
|
||||
|
||||
from .packages.pygments.lexers import (
|
||||
_iter_lexerclasses,
|
||||
_fn_matches,
|
||||
basename,
|
||||
ClassNotFound,
|
||||
find_lexer_class,
|
||||
get_lexer_by_name,
|
||||
guess_lexer_for_filename,
|
||||
)
|
||||
from .packages.pygments.modeline import get_filetype_from_buffer
|
||||
|
||||
|
@ -99,7 +101,7 @@ def smart_guess_lexer(file_name):
|
|||
lexer = lexer1
|
||||
if (lexer2 and accuracy2 and
|
||||
(not accuracy1 or accuracy2 > accuracy1)):
|
||||
lexer = lexer2 # pragma: nocover
|
||||
lexer = lexer2
|
||||
|
||||
return lexer
|
||||
|
||||
|
@ -113,7 +115,7 @@ def guess_lexer_using_filename(file_name, text):
|
|||
lexer, accuracy = None, None
|
||||
|
||||
try:
|
||||
lexer = guess_lexer_for_filename(file_name, text)
|
||||
lexer = custom_pygments_guess_lexer_for_filename(file_name, text)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
@ -263,3 +265,54 @@ def get_file_head(file_name):
|
|||
except:
|
||||
log.traceback(logging.DEBUG)
|
||||
return text
|
||||
|
||||
|
||||
def custom_pygments_guess_lexer_for_filename(_fn, _text, **options):
|
||||
"""Overwrite pygments.lexers.guess_lexer_for_filename to customize the
|
||||
priority of different lexers based on popularity of languages."""
|
||||
|
||||
fn = basename(_fn)
|
||||
primary = {}
|
||||
matching_lexers = set()
|
||||
for lexer in _iter_lexerclasses():
|
||||
for filename in lexer.filenames:
|
||||
if _fn_matches(fn, filename):
|
||||
matching_lexers.add(lexer)
|
||||
primary[lexer] = True
|
||||
for filename in lexer.alias_filenames:
|
||||
if _fn_matches(fn, filename):
|
||||
matching_lexers.add(lexer)
|
||||
primary[lexer] = False
|
||||
if not matching_lexers:
|
||||
raise ClassNotFound('no lexer for filename %r found' % fn)
|
||||
if len(matching_lexers) == 1:
|
||||
return matching_lexers.pop()(**options)
|
||||
result = []
|
||||
for lexer in matching_lexers:
|
||||
rv = lexer.analyse_text(_text)
|
||||
if rv == 1.0:
|
||||
return lexer(**options)
|
||||
result.append((rv, customize_priority(lexer)))
|
||||
|
||||
def type_sort(t):
|
||||
# sort by:
|
||||
# - analyse score
|
||||
# - is primary filename pattern?
|
||||
# - priority
|
||||
# - last resort: class name
|
||||
return (t[0], primary[t[1]], t[1].priority, t[1].__name__)
|
||||
result.sort(key=type_sort)
|
||||
|
||||
return result[-1][1](**options)
|
||||
|
||||
CUSTOM_PRIORITIES = {
|
||||
'perl': 0.1,
|
||||
'perl6': 0.1,
|
||||
'typescript': 0.11,
|
||||
}
|
||||
def customize_priority(lexer):
|
||||
"""Return an integer priority for the given lexer object."""
|
||||
|
||||
if lexer.name.lower() in CUSTOM_PRIORITIES:
|
||||
lexer.priority = CUSTOM_PRIORITIES[lexer.name.lower()]
|
||||
return lexer
|
||||
|
|
Loading…
Reference in a new issue