2013-09-22 20:39:16 +00:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
|
|
wakatime.stats
|
|
|
|
~~~~~~~~~~~~~~
|
|
|
|
|
2013-09-22 20:41:13 +00:00
|
|
|
Stats about files
|
2013-09-22 20:39:16 +00:00
|
|
|
|
|
|
|
:copyright: (c) 2013 Alan Hamlett.
|
|
|
|
:license: BSD, see LICENSE for more details.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import logging
|
|
|
|
import os
|
2017-02-27 00:24:43 +00:00
|
|
|
import re
|
2013-09-22 20:39:16 +00:00
|
|
|
import sys
|
|
|
|
|
2014-09-30 16:09:30 +00:00
|
|
|
from .compat import u, open
|
2017-04-20 06:05:40 +00:00
|
|
|
from .constants import MAX_FILE_SIZE_SUPPORTED
|
2015-09-27 02:26:15 +00:00
|
|
|
from .dependencies import DependencyParser
|
2017-03-04 21:11:40 +00:00
|
|
|
from .language_priorities import LANGUAGES
|
2014-09-30 16:09:30 +00:00
|
|
|
|
2017-02-20 21:22:14 +00:00
|
|
|
from .packages.pygments.lexers import (
|
2017-02-27 01:05:55 +00:00
|
|
|
_iter_lexerclasses,
|
|
|
|
_fn_matches,
|
|
|
|
basename,
|
2017-02-20 21:22:14 +00:00
|
|
|
ClassNotFound,
|
2017-02-20 22:23:02 +00:00
|
|
|
find_lexer_class,
|
2016-04-18 21:27:41 +00:00
|
|
|
get_lexer_by_name,
|
|
|
|
)
|
2017-02-20 21:22:14 +00:00
|
|
|
from .packages.pygments.modeline import get_filetype_from_buffer
|
2013-09-22 20:39:16 +00:00
|
|
|
|
2016-04-19 10:30:44 +00:00
|
|
|
try:
|
|
|
|
from .packages import simplejson as json # pragma: nocover
|
|
|
|
except (ImportError, SyntaxError): # pragma: nocover
|
|
|
|
import json
|
|
|
|
|
2013-09-22 20:39:16 +00:00
|
|
|
|
2014-07-25 07:59:25 +00:00
|
|
|
log = logging.getLogger('WakaTime')
|
2013-09-22 20:39:16 +00:00
|
|
|
|
|
|
|
|
2017-02-20 21:22:14 +00:00
|
|
|
def get_file_stats(file_name, entity_type='file', lineno=None, cursorpos=None,
|
2017-02-20 23:29:44 +00:00
|
|
|
plugin=None, language=None):
|
2017-02-20 21:22:14 +00:00
|
|
|
if entity_type != 'file':
|
|
|
|
stats = {
|
|
|
|
'language': None,
|
|
|
|
'dependencies': [],
|
|
|
|
'lines': None,
|
|
|
|
'lineno': lineno,
|
|
|
|
'cursorpos': cursorpos,
|
|
|
|
}
|
|
|
|
else:
|
2017-02-26 23:21:16 +00:00
|
|
|
language, lexer = standardize_language(language, plugin)
|
2017-02-20 22:23:02 +00:00
|
|
|
if not language:
|
|
|
|
language, lexer = guess_language(file_name)
|
|
|
|
|
2017-02-20 21:22:14 +00:00
|
|
|
parser = DependencyParser(file_name, lexer)
|
|
|
|
dependencies = parser.parse()
|
2017-02-20 22:23:02 +00:00
|
|
|
|
2017-02-20 21:22:14 +00:00
|
|
|
stats = {
|
|
|
|
'language': language,
|
|
|
|
'dependencies': dependencies,
|
|
|
|
'lines': number_lines_in_file(file_name),
|
|
|
|
'lineno': lineno,
|
|
|
|
'cursorpos': cursorpos,
|
|
|
|
}
|
|
|
|
return stats
|
|
|
|
|
|
|
|
|
2013-09-22 20:39:16 +00:00
|
|
|
def guess_language(file_name):
|
2015-06-11 21:11:10 +00:00
|
|
|
"""Guess lexer and language for a file.
|
|
|
|
|
2017-02-27 00:24:43 +00:00
|
|
|
Returns a tuple of (language_str, lexer_obj).
|
2015-06-11 21:11:10 +00:00
|
|
|
"""
|
|
|
|
|
2017-02-27 00:24:43 +00:00
|
|
|
lexer = None
|
|
|
|
|
2015-08-10 18:33:39 +00:00
|
|
|
language = get_language_from_extension(file_name)
|
2017-02-27 00:24:43 +00:00
|
|
|
if language:
|
|
|
|
lexer = get_lexer(language)
|
|
|
|
else:
|
|
|
|
lexer = smart_guess_lexer(file_name)
|
|
|
|
if lexer:
|
|
|
|
language = u(lexer.name)
|
2015-06-11 21:11:10 +00:00
|
|
|
|
2014-12-23 05:12:57 +00:00
|
|
|
return language, lexer
|
2013-09-22 20:39:16 +00:00
|
|
|
|
|
|
|
|
2015-06-11 21:11:10 +00:00
|
|
|
def smart_guess_lexer(file_name):
|
|
|
|
"""Guess Pygments lexer for a file.
|
|
|
|
|
|
|
|
Looks for a vim modeline in file contents, then compares the accuracy
|
|
|
|
of that lexer with a second guess. The second guess looks up all lexers
|
|
|
|
matching the file name, then runs a text analysis for the best choice.
|
|
|
|
"""
|
|
|
|
lexer = None
|
|
|
|
|
2016-04-19 10:30:44 +00:00
|
|
|
text = get_file_head(file_name)
|
2015-06-11 21:11:10 +00:00
|
|
|
|
2015-08-10 18:33:39 +00:00
|
|
|
lexer1, accuracy1 = guess_lexer_using_filename(file_name, text)
|
|
|
|
lexer2, accuracy2 = guess_lexer_using_modeline(text)
|
2015-06-11 21:11:10 +00:00
|
|
|
|
2015-08-10 18:33:39 +00:00
|
|
|
if lexer1:
|
|
|
|
lexer = lexer1
|
|
|
|
if (lexer2 and accuracy2 and
|
2017-10-24 05:01:31 +00:00
|
|
|
(not accuracy1 or accuracy2 > accuracy1)):
|
2017-02-27 01:05:55 +00:00
|
|
|
lexer = lexer2
|
2015-06-11 21:11:10 +00:00
|
|
|
|
|
|
|
return lexer
|
|
|
|
|
|
|
|
|
2015-06-16 01:55:47 +00:00
|
|
|
def guess_lexer_using_filename(file_name, text):
|
|
|
|
"""Guess lexer for given text, limited to lexers for this file's extension.
|
|
|
|
|
|
|
|
Returns a tuple of (lexer, accuracy).
|
2015-06-11 21:11:10 +00:00
|
|
|
"""
|
|
|
|
|
2015-06-16 01:55:47 +00:00
|
|
|
lexer, accuracy = None, None
|
2015-06-13 00:25:04 +00:00
|
|
|
|
2015-06-16 01:55:47 +00:00
|
|
|
try:
|
2017-02-27 01:05:55 +00:00
|
|
|
lexer = custom_pygments_guess_lexer_for_filename(file_name, text)
|
2017-02-20 21:22:14 +00:00
|
|
|
except:
|
2017-03-04 18:34:06 +00:00
|
|
|
log.traceback(logging.DEBUG)
|
2015-06-13 00:25:04 +00:00
|
|
|
|
2015-06-16 01:55:47 +00:00
|
|
|
if lexer is not None:
|
|
|
|
try:
|
|
|
|
accuracy = lexer.analyse_text(text)
|
2017-02-20 21:22:14 +00:00
|
|
|
except:
|
2017-03-04 18:34:06 +00:00
|
|
|
log.traceback(logging.DEBUG)
|
2015-06-16 01:55:47 +00:00
|
|
|
|
|
|
|
return lexer, accuracy
|
2013-10-26 04:28:59 +00:00
|
|
|
|
|
|
|
|
2015-06-16 01:26:10 +00:00
|
|
|
def guess_lexer_using_modeline(text):
|
|
|
|
"""Guess lexer for given text using Vim modeline.
|
2015-06-16 01:55:47 +00:00
|
|
|
|
|
|
|
Returns a tuple of (lexer, accuracy).
|
2015-06-16 01:26:10 +00:00
|
|
|
"""
|
|
|
|
|
2015-06-16 01:55:47 +00:00
|
|
|
lexer, accuracy = None, None
|
2015-06-16 01:26:10 +00:00
|
|
|
|
2015-07-02 19:36:45 +00:00
|
|
|
file_type = None
|
|
|
|
try:
|
|
|
|
file_type = get_filetype_from_buffer(text)
|
2015-09-07 03:46:57 +00:00
|
|
|
except: # pragma: nocover
|
2017-03-04 18:34:06 +00:00
|
|
|
log.traceback(logging.DEBUG)
|
2015-07-02 19:36:45 +00:00
|
|
|
|
2015-06-16 01:26:10 +00:00
|
|
|
if file_type is not None:
|
|
|
|
try:
|
2015-06-16 01:55:47 +00:00
|
|
|
lexer = get_lexer_by_name(file_type)
|
2016-06-15 19:09:40 +00:00
|
|
|
except ClassNotFound:
|
2017-03-04 18:34:06 +00:00
|
|
|
log.traceback(logging.DEBUG)
|
2015-06-16 01:55:47 +00:00
|
|
|
|
|
|
|
if lexer is not None:
|
|
|
|
try:
|
|
|
|
accuracy = lexer.analyse_text(text)
|
2015-09-07 03:46:57 +00:00
|
|
|
except: # pragma: nocover
|
2017-03-04 18:34:06 +00:00
|
|
|
log.traceback(logging.DEBUG)
|
2015-06-16 01:26:10 +00:00
|
|
|
|
2015-06-16 01:55:47 +00:00
|
|
|
return lexer, accuracy
|
|
|
|
|
|
|
|
|
2015-08-10 18:33:39 +00:00
|
|
|
def get_language_from_extension(file_name):
|
|
|
|
"""Returns a matching language for the given file extension.
|
2017-02-27 00:24:43 +00:00
|
|
|
|
|
|
|
When guessed_language is 'C', does not restrict to known file extensions.
|
2015-06-16 01:55:47 +00:00
|
|
|
"""
|
|
|
|
|
2015-09-07 03:06:36 +00:00
|
|
|
filepart, extension = os.path.splitext(file_name)
|
|
|
|
|
2017-02-27 00:24:43 +00:00
|
|
|
if re.match(r'\.h.*', extension, re.IGNORECASE) or re.match(r'\.c.*', extension, re.IGNORECASE):
|
|
|
|
|
|
|
|
if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))):
|
|
|
|
return 'C'
|
2015-09-07 03:06:36 +00:00
|
|
|
|
2017-06-08 06:32:03 +00:00
|
|
|
available_extensions = extensions_in_same_folder(file_name)
|
2015-08-11 01:17:50 +00:00
|
|
|
if '.cpp' in available_extensions:
|
2015-08-10 18:33:39 +00:00
|
|
|
return 'C++'
|
2015-08-11 01:17:50 +00:00
|
|
|
if '.c' in available_extensions:
|
2015-08-10 18:33:39 +00:00
|
|
|
return 'C'
|
2015-06-16 01:55:47 +00:00
|
|
|
|
2015-06-16 01:26:10 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
2013-09-22 20:39:16 +00:00
|
|
|
def number_lines_in_file(file_name):
|
2017-04-20 06:05:40 +00:00
|
|
|
try:
|
|
|
|
if os.path.getsize(file_name) > MAX_FILE_SIZE_SUPPORTED:
|
|
|
|
return None
|
|
|
|
except os.error:
|
|
|
|
pass
|
2013-09-22 20:39:16 +00:00
|
|
|
lines = 0
|
|
|
|
try:
|
2014-09-30 16:09:30 +00:00
|
|
|
with open(file_name, 'r', encoding='utf-8') as fh:
|
|
|
|
for line in fh:
|
2013-09-22 20:39:16 +00:00
|
|
|
lines += 1
|
2015-09-08 21:07:14 +00:00
|
|
|
except: # pragma: nocover
|
2015-08-24 01:49:34 +00:00
|
|
|
try:
|
|
|
|
with open(file_name, 'r', encoding=sys.getfilesystemencoding()) as fh:
|
|
|
|
for line in fh:
|
|
|
|
lines += 1
|
|
|
|
except:
|
|
|
|
return None
|
2013-09-22 20:39:16 +00:00
|
|
|
return lines
|
|
|
|
|
|
|
|
|
2016-04-19 10:30:44 +00:00
|
|
|
def standardize_language(language, plugin):
|
2017-02-26 23:21:16 +00:00
|
|
|
"""Maps a string to the equivalent Pygments language.
|
|
|
|
|
2017-02-27 00:24:43 +00:00
|
|
|
Returns a tuple of (language_str, lexer_obj).
|
2017-02-26 23:21:16 +00:00
|
|
|
"""
|
2016-04-19 10:30:44 +00:00
|
|
|
|
2017-02-20 22:23:02 +00:00
|
|
|
if not language:
|
2017-02-26 23:21:16 +00:00
|
|
|
return None, None
|
2017-02-20 22:23:02 +00:00
|
|
|
|
2016-04-19 10:30:44 +00:00
|
|
|
# standardize language for this plugin
|
|
|
|
if plugin:
|
|
|
|
plugin = plugin.split(' ')[-1].split('/')[0].split('-')[0]
|
|
|
|
standardized = get_language_from_json(language, plugin)
|
|
|
|
if standardized is not None:
|
2017-02-26 23:21:16 +00:00
|
|
|
return standardized, get_lexer(standardized)
|
2016-04-19 10:30:44 +00:00
|
|
|
|
|
|
|
# standardize language against default languages
|
|
|
|
standardized = get_language_from_json(language, 'default')
|
2017-02-26 23:21:16 +00:00
|
|
|
return standardized, get_lexer(standardized)
|
|
|
|
|
|
|
|
|
|
|
|
def get_lexer(language):
|
|
|
|
"""Return a Pygments Lexer object for the given language string."""
|
|
|
|
|
|
|
|
if not language:
|
|
|
|
return None
|
|
|
|
|
|
|
|
lexer_cls = find_lexer_class(language)
|
|
|
|
if lexer_cls:
|
|
|
|
return lexer_cls()
|
|
|
|
|
|
|
|
return None
|
2016-04-19 10:30:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_language_from_json(language, key):
|
|
|
|
"""Finds the given language in a json file."""
|
|
|
|
|
|
|
|
file_name = os.path.join(
|
|
|
|
os.path.dirname(__file__),
|
|
|
|
'languages',
|
|
|
|
'{0}.json').format(key.lower())
|
|
|
|
|
2017-03-04 18:34:06 +00:00
|
|
|
if os.path.exists(file_name):
|
|
|
|
try:
|
|
|
|
with open(file_name, 'r', encoding='utf-8') as fh:
|
|
|
|
languages = json.loads(fh.read())
|
|
|
|
if languages.get(language.lower()):
|
|
|
|
return languages[language.lower()]
|
|
|
|
except:
|
|
|
|
log.traceback(logging.DEBUG)
|
2016-04-19 10:30:44 +00:00
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_file_head(file_name):
|
|
|
|
"""Returns the first 512000 bytes of the file's contents."""
|
2015-06-11 21:11:10 +00:00
|
|
|
|
|
|
|
text = None
|
|
|
|
try:
|
|
|
|
with open(file_name, 'r', encoding='utf-8') as fh:
|
|
|
|
text = fh.read(512000)
|
2016-06-16 06:02:56 +00:00
|
|
|
except:
|
2015-08-24 01:49:34 +00:00
|
|
|
try:
|
|
|
|
with open(file_name, 'r', encoding=sys.getfilesystemencoding()) as fh:
|
2016-08-31 15:13:25 +00:00
|
|
|
text = fh.read(512000) # pragma: nocover
|
2015-08-24 01:49:34 +00:00
|
|
|
except:
|
2016-09-01 09:49:12 +00:00
|
|
|
log.traceback(logging.DEBUG)
|
2015-06-11 21:11:10 +00:00
|
|
|
return text
|
2017-02-27 01:05:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
def custom_pygments_guess_lexer_for_filename(_fn, _text, **options):
|
|
|
|
"""Overwrite pygments.lexers.guess_lexer_for_filename to customize the
|
|
|
|
priority of different lexers based on popularity of languages."""
|
|
|
|
|
|
|
|
fn = basename(_fn)
|
|
|
|
primary = {}
|
|
|
|
matching_lexers = set()
|
|
|
|
for lexer in _iter_lexerclasses():
|
|
|
|
for filename in lexer.filenames:
|
|
|
|
if _fn_matches(fn, filename):
|
|
|
|
matching_lexers.add(lexer)
|
|
|
|
primary[lexer] = True
|
|
|
|
for filename in lexer.alias_filenames:
|
|
|
|
if _fn_matches(fn, filename):
|
|
|
|
matching_lexers.add(lexer)
|
|
|
|
primary[lexer] = False
|
|
|
|
if not matching_lexers:
|
|
|
|
raise ClassNotFound('no lexer for filename %r found' % fn)
|
|
|
|
if len(matching_lexers) == 1:
|
|
|
|
return matching_lexers.pop()(**options)
|
|
|
|
result = []
|
|
|
|
for lexer in matching_lexers:
|
|
|
|
rv = lexer.analyse_text(_text)
|
|
|
|
if rv == 1.0:
|
|
|
|
return lexer(**options)
|
2017-06-08 06:32:03 +00:00
|
|
|
result.append(customize_lexer_priority(_fn, rv, lexer))
|
2017-02-27 01:05:55 +00:00
|
|
|
|
|
|
|
def type_sort(t):
|
|
|
|
# sort by:
|
|
|
|
# - analyse score
|
|
|
|
# - is primary filename pattern?
|
|
|
|
# - priority
|
|
|
|
# - last resort: class name
|
2017-06-08 06:32:03 +00:00
|
|
|
return (t[0], primary[t[2]], t[1], t[2].__name__)
|
2017-02-27 01:05:55 +00:00
|
|
|
result.sort(key=type_sort)
|
|
|
|
|
2017-06-08 06:32:03 +00:00
|
|
|
return result[-1][2](**options)
|
2017-02-27 01:05:55 +00:00
|
|
|
|
2017-02-27 01:08:09 +00:00
|
|
|
|
2017-06-08 06:32:03 +00:00
|
|
|
def customize_lexer_priority(file_name, accuracy, lexer):
|
|
|
|
"""Customize lexer priority"""
|
|
|
|
|
|
|
|
priority = lexer.priority
|
2017-02-27 01:05:55 +00:00
|
|
|
|
2017-03-04 21:42:44 +00:00
|
|
|
lexer_name = lexer.name.lower().replace('sharp', '#')
|
|
|
|
if lexer_name in LANGUAGES:
|
2017-06-08 06:32:03 +00:00
|
|
|
priority = LANGUAGES[lexer_name]
|
|
|
|
elif lexer_name == 'matlab':
|
|
|
|
available_extensions = extensions_in_same_folder(file_name)
|
|
|
|
if '.mat' in available_extensions:
|
|
|
|
priority = 0.06
|
|
|
|
|
|
|
|
return (accuracy, priority, lexer)
|
|
|
|
|
|
|
|
|
|
|
|
def extensions_in_same_folder(file_name):
|
|
|
|
"""Returns a list of file extensions from the same folder as file_name."""
|
|
|
|
|
|
|
|
directory = os.path.dirname(file_name)
|
|
|
|
files = os.listdir(directory)
|
|
|
|
extensions = list(zip(*map(os.path.splitext, files)))[1]
|
|
|
|
extensions = set([ext.lower() for ext in extensions])
|
|
|
|
return extensions
|