use most accurate language from modeline lexer and filename lexer Pygments guessing functions
This commit is contained in:
parent
127fd115d0
commit
36f6bb9a21
1 changed files with 71 additions and 8 deletions
|
@ -20,7 +20,7 @@ if sys.version_info[0] == 2:
|
||||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2'))
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2'))
|
||||||
else:
|
else:
|
||||||
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py3'))
|
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py3'))
|
||||||
from pygments.lexers import guess_lexer_for_filename
|
from pygments.lexers import guess_lexer, guess_lexer_for_filename
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger('WakaTime')
|
log = logging.getLogger('WakaTime')
|
||||||
|
@ -47,20 +47,67 @@ TRANSLATIONS = {
|
||||||
|
|
||||||
|
|
||||||
def guess_language(file_name):
|
def guess_language(file_name):
|
||||||
language, lexer = None, None
|
"""Guess lexer and language for a file.
|
||||||
try:
|
|
||||||
with open(file_name, 'r', encoding='utf-8') as fh:
|
Returns (language, lexer) tuple where language is a unicode string.
|
||||||
lexer = guess_lexer_for_filename(file_name, fh.read(512000))
|
"""
|
||||||
except:
|
|
||||||
pass
|
lexer = smart_guess_lexer(file_name)
|
||||||
|
|
||||||
|
language = None
|
||||||
|
|
||||||
|
# guess language from file extension
|
||||||
if file_name:
|
if file_name:
|
||||||
language = guess_language_from_extension(file_name.rsplit('.', 1)[-1])
|
language = guess_language_from_extension(file_name.rsplit('.', 1)[-1])
|
||||||
if lexer and language is None:
|
|
||||||
|
# get language from lexer if we didn't have a hard-coded extension rule
|
||||||
|
if language is None and lexer:
|
||||||
language = translate_language(u(lexer.name))
|
language = translate_language(u(lexer.name))
|
||||||
|
|
||||||
return language, lexer
|
return language, lexer
|
||||||
|
|
||||||
|
|
||||||
|
def smart_guess_lexer(file_name):
|
||||||
|
"""Guess Pygments lexer for a file.
|
||||||
|
|
||||||
|
Looks for a vim modeline in file contents, then compares the accuracy
|
||||||
|
of that lexer with a second guess. The second guess looks up all lexers
|
||||||
|
matching the file name, then runs a text analysis for the best choice.
|
||||||
|
"""
|
||||||
|
lexer = None
|
||||||
|
|
||||||
|
text = get_file_contents(file_name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
guess_1 = guess_lexer(text)
|
||||||
|
except:
|
||||||
|
guess_1 = None
|
||||||
|
try:
|
||||||
|
guess_2 = guess_lexer_for_filename(file_name, text)
|
||||||
|
except:
|
||||||
|
guess_2 = None
|
||||||
|
try:
|
||||||
|
accuracy_1 = guess_1.analyse_text(text)
|
||||||
|
except:
|
||||||
|
accuracy_1 = None
|
||||||
|
try:
|
||||||
|
accuracy_2 = guess_2.analyse_text(text)
|
||||||
|
except:
|
||||||
|
accuracy_2 = None
|
||||||
|
|
||||||
|
if accuracy_1:
|
||||||
|
lexer = guess_1
|
||||||
|
if (accuracy_2 and
|
||||||
|
(not accuracy_1 or accuracy_2 > accuracy_1)):
|
||||||
|
lexer = guess_2
|
||||||
|
|
||||||
|
return lexer
|
||||||
|
|
||||||
|
|
||||||
def guess_language_from_extension(extension):
|
def guess_language_from_extension(extension):
|
||||||
|
"""Checks hard-coded extension map for a matching language.
|
||||||
|
"""
|
||||||
|
|
||||||
if extension:
|
if extension:
|
||||||
if extension in EXTENSIONS:
|
if extension in EXTENSIONS:
|
||||||
return EXTENSIONS[extension]
|
return EXTENSIONS[extension]
|
||||||
|
@ -70,6 +117,9 @@ def guess_language_from_extension(extension):
|
||||||
|
|
||||||
|
|
||||||
def translate_language(language):
|
def translate_language(language):
|
||||||
|
"""Turns Pygments lexer class name string into human-readable language.
|
||||||
|
"""
|
||||||
|
|
||||||
if language in TRANSLATIONS:
|
if language in TRANSLATIONS:
|
||||||
language = TRANSLATIONS[language]
|
language = TRANSLATIONS[language]
|
||||||
return language
|
return language
|
||||||
|
@ -107,3 +157,16 @@ def get_file_stats(file_name, notfile=False, lineno=None, cursorpos=None):
|
||||||
'cursorpos': cursorpos,
|
'cursorpos': cursorpos,
|
||||||
}
|
}
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_contents(file_name):
|
||||||
|
"""Returns the first 512000 bytes of the file's contents.
|
||||||
|
"""
|
||||||
|
|
||||||
|
text = None
|
||||||
|
try:
|
||||||
|
with open(file_name, 'r', encoding='utf-8') as fh:
|
||||||
|
text = fh.read(512000)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return text
|
||||||
|
|
Loading…
Reference in a new issue