limit language detection to known file extensions, unless file contents has a vim modeline
This commit is contained in:
parent
b2dcc7ccc2
commit
258a676759
1 changed files with 44 additions and 12 deletions
|
@ -20,13 +20,14 @@ if sys.version_info[0] == 2:
|
|||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2'))
|
||||
else:
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py3'))
|
||||
from pygments.lexers import guess_lexer, guess_lexer_for_filename
|
||||
from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename
|
||||
from pygments.modeline import get_filetype_from_buffer
|
||||
|
||||
|
||||
log = logging.getLogger('WakaTime')
|
||||
|
||||
|
||||
# force file name extensions to be recognized as a certain language
|
||||
# extensions taking priority over lexer
|
||||
EXTENSIONS = {
|
||||
'j2': 'HTML',
|
||||
'markdown': 'Markdown',
|
||||
|
@ -34,6 +35,8 @@ EXTENSIONS = {
|
|||
'mdown': 'Markdown',
|
||||
'twig': 'Twig',
|
||||
}
|
||||
|
||||
# lexers to human readable languages
|
||||
TRANSLATIONS = {
|
||||
'CSS+Genshi Text': 'CSS',
|
||||
'CSS+Lasso': 'CSS',
|
||||
|
@ -45,6 +48,11 @@ TRANSLATIONS = {
|
|||
'RHTML': 'HTML',
|
||||
}
|
||||
|
||||
# extensions for when no lexer is found
|
||||
AUXILIARY_EXTENSIONS = {
|
||||
'vb': 'VB.net',
|
||||
}
|
||||
|
||||
|
||||
def guess_language(file_name):
|
||||
"""Guess lexer and language for a file.
|
||||
|
@ -58,11 +66,17 @@ def guess_language(file_name):
|
|||
|
||||
# guess language from file extension
|
||||
if file_name:
|
||||
language = guess_language_from_extension(file_name.rsplit('.', 1)[-1])
|
||||
language = get_language_from_extension(file_name, EXTENSIONS)
|
||||
|
||||
# get language from lexer if we didn't have a hard-coded extension rule
|
||||
if language is None and lexer:
|
||||
language = translate_language(u(lexer.name))
|
||||
language = u(lexer.name)
|
||||
|
||||
if language is None:
|
||||
language = get_language_from_extension(file_name, AUXILIARY_EXTENSIONS)
|
||||
|
||||
if language is not None:
|
||||
language = translate_language(language)
|
||||
|
||||
return language, lexer
|
||||
|
||||
|
@ -79,11 +93,11 @@ def smart_guess_lexer(file_name):
|
|||
text = get_file_contents(file_name)
|
||||
|
||||
try:
|
||||
guess_1 = guess_lexer(text)
|
||||
guess_1 = guess_lexer_for_filename(file_name, text)
|
||||
except:
|
||||
guess_1 = None
|
||||
try:
|
||||
guess_2 = guess_lexer_for_filename(file_name, text)
|
||||
guess_2 = guess_lexer_using_modeline(text)
|
||||
except:
|
||||
guess_2 = None
|
||||
try:
|
||||
|
@ -104,15 +118,33 @@ def smart_guess_lexer(file_name):
|
|||
return lexer
|
||||
|
||||
|
||||
def guess_language_from_extension(extension):
|
||||
"""Checks hard-coded extension map for a matching language.
|
||||
def get_language_from_extension(file_name, extension_map):
|
||||
"""Returns a matching language for the given file_name using extension_map.
|
||||
"""
|
||||
|
||||
extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else None
|
||||
|
||||
if extension:
|
||||
if extension in EXTENSIONS:
|
||||
return EXTENSIONS[extension]
|
||||
if extension.lower() in EXTENSIONS:
|
||||
return EXTENSIONS[extension.lower()]
|
||||
if extension in extension_map:
|
||||
return extension_map[extension]
|
||||
if extension.lower() in extension_map:
|
||||
return extension_map[extension.lower()]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def guess_lexer_using_modeline(text):
|
||||
"""Guess lexer for given text using Vim modeline.
|
||||
"""
|
||||
|
||||
file_type = get_filetype_from_buffer(text)
|
||||
|
||||
if file_type is not None:
|
||||
try:
|
||||
return get_lexer_by_name(file_type)
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue