move language rules to server, detect header files

- move language translations to api server
- move extension rules to api server
- detect correct header file language based on presence of .cpp or .c
  files named the same as the .h file
This commit is contained in:
Alan Hamlett 2015-08-10 11:33:39 -07:00
parent 01ed25f948
commit 6e9e8a9ce2

View file

@ -28,57 +28,20 @@ from pygments.util import ClassNotFound
log = logging.getLogger('WakaTime') log = logging.getLogger('WakaTime')
# extensions taking priority over lexer
EXTENSIONS = {
'j2': 'HTML',
'markdown': 'Markdown',
'md': 'Markdown',
'mdown': 'Markdown',
'twig': 'Twig',
}
# lexers to human readable languages
TRANSLATIONS = {
'CSS+Genshi Text': 'CSS',
'CSS+Lasso': 'CSS',
'HTML+Django/Jinja': 'HTML',
'HTML+Lasso': 'HTML',
'JavaScript+Genshi Text': 'JavaScript',
'JavaScript+Lasso': 'JavaScript',
'Perl6': 'Perl',
'RHTML': 'HTML',
}
# extensions for when no lexer is found
AUXILIARY_EXTENSIONS = {
'vb': 'VB.net',
}
def guess_language(file_name): def guess_language(file_name):
"""Guess lexer and language for a file. """Guess lexer and language for a file.
Returns (language, lexer) tuple where language is a unicode string. Returns (language, lexer) tuple where language is a unicode string.
""" """
language = get_language_from_extension(file_name)
if language:
return language, None
lexer = smart_guess_lexer(file_name) lexer = smart_guess_lexer(file_name)
language = None
# guess language from file extension
if file_name:
language = get_language_from_extension(file_name, EXTENSIONS)
# get language from lexer if we didn't have a hard-coded extension rule
if language is None and lexer:
language = u(lexer.name) language = u(lexer.name)
if language is None:
language = get_language_from_extension(file_name, AUXILIARY_EXTENSIONS)
if language is not None:
language = translate_language(language)
return language, lexer return language, lexer
@ -93,14 +56,14 @@ def smart_guess_lexer(file_name):
text = get_file_contents(file_name) text = get_file_contents(file_name)
lexer_1, accuracy_1 = guess_lexer_using_filename(file_name, text) lexer1, accuracy1 = guess_lexer_using_filename(file_name, text)
lexer_2, accuracy_2 = guess_lexer_using_modeline(text) lexer2, accuracy2 = guess_lexer_using_modeline(text)
if lexer_1: if lexer1:
lexer = lexer_1 lexer = lexer1
if (lexer_2 and accuracy_2 and if (lexer2 and accuracy2 and
(not accuracy_1 or accuracy_2 > accuracy_1)): (not accuracy1 or accuracy2 > accuracy1)):
lexer = lexer_2 lexer = lexer2
return lexer return lexer
@ -156,30 +119,22 @@ def guess_lexer_using_modeline(text):
return lexer, accuracy return lexer, accuracy
def get_language_from_extension(file_name, extension_map): def get_language_from_extension(file_name):
"""Returns a matching language for the given file_name using extension_map. """Returns a matching language for the given file extension.
""" """
extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else None extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else ''
file_minus_extension = file_name.rsplit('.', 1)[0] if len(file_name.rsplit('.', 1)) > 1 else file_name
if extension: if extension.lower() == 'h':
if extension in extension_map: if os.path.isfile(file_minus_extension + '.cpp'):
return extension_map[extension] return 'C++'
if extension.lower() in extension_map: if os.path.isfile(file_minus_extension + '.c'):
return extension_map[extension.lower()] return 'C'
return None return None
def translate_language(language):
"""Turns Pygments lexer class name string into human-readable language.
"""
if language in TRANSLATIONS:
language = TRANSLATIONS[language]
return language
def number_lines_in_file(file_name): def number_lines_in_file(file_name):
lines = 0 lines = 0
try: try: