sublime-rana/packages/wakatime/stats.py

241 lines
6.2 KiB
Python
Raw Normal View History

2013-09-22 20:51:23 +00:00
# -*- coding: utf-8 -*-
"""
wakatime.stats
~~~~~~~~~~~~~~
Stats about files
:copyright: (c) 2013 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
import logging
import os
import sys
2014-09-30 16:27:35 +00:00
from .compat import u, open
2015-09-29 10:11:25 +00:00
from .dependencies import DependencyParser
2014-09-30 16:27:35 +00:00
2016-04-18 22:26:28 +00:00
from .packages import (
get_lexer_by_name,
guess_lexer_for_filename,
get_filetype_from_buffer,
ClassNotFound,
)
2013-09-22 20:51:23 +00:00
2016-04-28 22:04:46 +00:00
try:
from .packages import simplejson as json # pragma: nocover
except (ImportError, SyntaxError): # pragma: nocover
import json
2013-09-22 20:51:23 +00:00
log = logging.getLogger('WakaTime')
2013-09-22 20:51:23 +00:00
def guess_language(file_name):
2015-06-21 17:35:14 +00:00
"""Guess lexer and language for a file.
Returns (language, lexer) tuple where language is a unicode string.
"""
2015-08-25 07:42:37 +00:00
language = get_language_from_extension(file_name)
lexer = smart_guess_lexer(file_name)
2015-09-29 10:11:25 +00:00
if language is None and lexer is not None:
2015-06-21 17:35:14 +00:00
language = u(lexer.name)
return language, lexer
def smart_guess_lexer(file_name):
"""Guess Pygments lexer for a file.
Looks for a vim modeline in file contents, then compares the accuracy
of that lexer with a second guess. The second guess looks up all lexers
matching the file name, then runs a text analysis for the best choice.
"""
lexer = None
2016-04-28 22:04:46 +00:00
text = get_file_head(file_name)
2015-06-21 17:35:14 +00:00
2015-08-25 07:42:37 +00:00
lexer1, accuracy1 = guess_lexer_using_filename(file_name, text)
lexer2, accuracy2 = guess_lexer_using_modeline(text)
2015-06-21 17:35:14 +00:00
2015-08-25 07:42:37 +00:00
if lexer1:
lexer = lexer1
if (lexer2 and accuracy2 and
(not accuracy1 or accuracy2 > accuracy1)):
2015-09-29 10:11:25 +00:00
lexer = lexer2 # pragma: nocover
2015-06-21 17:35:14 +00:00
return lexer
def guess_lexer_using_filename(file_name, text):
"""Guess lexer for given text, limited to lexers for this file's extension.
Returns a tuple of (lexer, accuracy).
"""
lexer, accuracy = None, None
2013-09-22 20:51:23 +00:00
try:
2015-06-21 17:35:14 +00:00
lexer = guess_lexer_for_filename(file_name, text)
2015-09-29 10:11:25 +00:00
except: # pragma: nocover
2013-09-22 20:51:23 +00:00
pass
2015-06-21 17:35:14 +00:00
if lexer is not None:
try:
accuracy = lexer.analyse_text(text)
2015-09-29 10:11:25 +00:00
except: # pragma: nocover
2015-06-21 17:35:14 +00:00
pass
return lexer, accuracy
def guess_lexer_using_modeline(text):
"""Guess lexer for given text using Vim modeline.
Returns a tuple of (lexer, accuracy).
"""
lexer, accuracy = None, None
file_type = None
try:
file_type = get_filetype_from_buffer(text)
2015-09-29 10:11:25 +00:00
except: # pragma: nocover
pass
2015-06-21 17:35:14 +00:00
if file_type is not None:
try:
lexer = get_lexer_by_name(file_type)
2015-09-29 10:11:25 +00:00
except ClassNotFound: # pragma: nocover
2015-06-21 17:35:14 +00:00
pass
if lexer is not None:
try:
accuracy = lexer.analyse_text(text)
2015-09-29 10:11:25 +00:00
except: # pragma: nocover
2015-06-21 17:35:14 +00:00
pass
return lexer, accuracy
2015-08-25 07:42:37 +00:00
def get_language_from_extension(file_name):
"""Returns a matching language for the given file extension.
2015-06-21 17:35:14 +00:00
"""
2015-09-29 10:11:25 +00:00
filepart, extension = os.path.splitext(file_name)
if os.path.exists(u('{0}{1}').format(u(filepart), u('.c'))) or os.path.exists(u('{0}{1}').format(u(filepart), u('.C'))):
return 'C'
extension = extension.lower()
2015-08-25 07:42:37 +00:00
if extension == '.h':
directory = os.path.dirname(file_name)
available_files = os.listdir(directory)
2015-09-29 10:11:25 +00:00
available_extensions = list(zip(*map(os.path.splitext, available_files)))[1]
2015-08-25 07:42:37 +00:00
available_extensions = [ext.lower() for ext in available_extensions]
if '.cpp' in available_extensions:
return 'C++'
if '.c' in available_extensions:
return 'C'
2015-06-21 17:35:14 +00:00
2013-10-26 04:33:31 +00:00
return None
2013-09-22 20:51:23 +00:00
def number_lines_in_file(file_name):
lines = 0
try:
2014-09-30 16:27:35 +00:00
with open(file_name, 'r', encoding='utf-8') as fh:
for line in fh:
2013-09-22 20:51:23 +00:00
lines += 1
2015-09-29 10:11:25 +00:00
except: # pragma: nocover
2015-08-25 07:42:37 +00:00
try:
with open(file_name, 'r', encoding=sys.getfilesystemencoding()) as fh:
for line in fh:
lines += 1
except:
return None
2013-09-22 20:51:23 +00:00
return lines
2016-04-28 22:04:46 +00:00
def get_file_stats(file_name, entity_type='file', lineno=None, cursorpos=None,
plugin=None, alternate_language=None):
2015-09-29 10:11:25 +00:00
if entity_type != 'file':
2015-03-09 22:23:29 +00:00
stats = {
'language': None,
'dependencies': [],
'lines': None,
2015-05-06 22:45:34 +00:00
'lineno': lineno,
2015-05-06 23:33:32 +00:00
'cursorpos': cursorpos,
2015-03-09 22:23:29 +00:00
}
else:
language, lexer = guess_language(file_name)
parser = DependencyParser(file_name, lexer)
dependencies = parser.parse()
2016-04-28 22:04:46 +00:00
if language is None and alternate_language:
language = standardize_language(alternate_language, plugin)
2015-03-09 22:23:29 +00:00
stats = {
'language': language,
'dependencies': dependencies,
'lines': number_lines_in_file(file_name),
2015-05-06 22:45:34 +00:00
'lineno': lineno,
2015-05-06 23:33:32 +00:00
'cursorpos': cursorpos,
2015-03-09 22:23:29 +00:00
}
2013-09-22 20:51:23 +00:00
return stats
2015-06-21 17:35:14 +00:00
2016-04-28 22:04:46 +00:00
def standardize_language(language, plugin):
"""Maps a string to the equivalent Pygments language."""
# standardize language for this plugin
if plugin:
plugin = plugin.split(' ')[-1].split('/')[0].split('-')[0]
standardized = get_language_from_json(language, plugin)
if standardized is not None:
return standardized
# standardize language against default languages
standardized = get_language_from_json(language, 'default')
if standardized is not None:
return standardized
return None
def get_language_from_json(language, key):
"""Finds the given language in a json file."""
file_name = os.path.join(
os.path.dirname(__file__),
'languages',
'{0}.json').format(key.lower())
try:
with open(file_name, 'r', encoding='utf-8') as fh:
languages = json.loads(fh.read())
if language in languages.values():
return language
if languages.get(language):
return languages[language]
except:
pass
return None
def get_file_head(file_name):
"""Returns the first 512000 bytes of the file's contents."""
2015-06-21 17:35:14 +00:00
text = None
try:
with open(file_name, 'r', encoding='utf-8') as fh:
text = fh.read(512000)
2015-09-29 10:11:25 +00:00
except: # pragma: nocover
2015-08-25 07:42:37 +00:00
try:
with open(file_name, 'r', encoding=sys.getfilesystemencoding()) as fh:
text = fh.read(512000)
except:
2016-03-06 20:48:42 +00:00
log.traceback('debug')
2015-06-21 17:35:14 +00:00
return text