diff --git a/packages/wakatime/__about__.py b/packages/wakatime/__about__.py index 8208813..51edf6e 100644 --- a/packages/wakatime/__about__.py +++ b/packages/wakatime/__about__.py @@ -1,7 +1,7 @@ __title__ = 'wakatime' __description__ = 'Common interface to the WakaTime api.' __url__ = 'https://github.com/wakatime/wakatime' -__version_info__ = ('4', '0', '13') +__version_info__ = ('4', '0', '15') __version__ = '.'.join(__version_info__) __author__ = 'Alan Hamlett' __author_email__ = 'alan@wakatime.com' diff --git a/packages/wakatime/base.py b/packages/wakatime/base.py index 22085de..c49b81a 100644 --- a/packages/wakatime/base.py +++ b/packages/wakatime/base.py @@ -314,14 +314,15 @@ def send_heartbeat(project=None, branch=None, stats={}, key=None, targetFile=Non log.debug('Sending heartbeat to api at %s' % api_url) data = { 'time': timestamp, - 'file': targetFile, + 'entity': targetFile, + 'type': 'file', } if hidefilenames and targetFile is not None and not notfile: - data['file'] = data['file'].rsplit('/', 1)[-1].rsplit('\\', 1)[-1] - if len(data['file'].strip('.').split('.', 1)) > 1: - data['file'] = u('HIDDEN.{ext}').format(ext=u(data['file'].strip('.').rsplit('.', 1)[-1])) + data['entity'] = data['entity'].rsplit('/', 1)[-1].rsplit('\\', 1)[-1] + if len(data['entity'].strip('.').split('.', 1)) > 1: + data['entity'] = u('HIDDEN.{ext}').format(ext=u(data['entity'].strip('.').rsplit('.', 1)[-1])) else: - data['file'] = u('HIDDEN') + data['entity'] = u('HIDDEN') if stats.get('lines'): data['lines'] = stats['lines'] if stats.get('language'): @@ -455,15 +456,11 @@ def main(argv=None): project_name = args.alternate_project kwargs = vars(args) - if 'project' in kwargs: - del kwargs['project'] + kwargs['project'] = project_name + kwargs['branch'] = branch + kwargs['stats'] = stats - if send_heartbeat( - project=project_name, - branch=branch, - stats=stats, - **kwargs - ): + if send_heartbeat(**kwargs): queue = Queue() while True: heartbeat = queue.pop() diff --git a/packages/wakatime/logger.py b/packages/wakatime/logger.py index 3c768cf..2dc453e 100644 --- a/packages/wakatime/logger.py +++ b/packages/wakatime/logger.py @@ -37,15 +37,17 @@ class CustomEncoder(json.JSONEncoder): class JsonFormatter(logging.Formatter): - def setup(self, timestamp, isWrite, targetFile, version, plugin, verbose): + def setup(self, timestamp, isWrite, targetFile, version, plugin, verbose, + warnings=False): self.timestamp = timestamp self.isWrite = isWrite self.targetFile = targetFile self.version = version self.plugin = plugin self.verbose = verbose + self.warnings = warnings - def format(self, record): + def format(self, record, *args): data = OrderedDict([ ('now', self.formatTime(record, self.datefmt)), ]) @@ -60,7 +62,7 @@ class JsonFormatter(logging.Formatter): if not self.isWrite: del data['isWrite'] data['level'] = record.levelname - data['message'] = record.msg + data['message'] = record.getMessage() if self.warnings else record.msg if not self.plugin: del data['plugin'] return CustomEncoder().encode(data) @@ -77,7 +79,6 @@ def set_log_level(logger, args): def setup_logging(args, version): - logging.captureWarnings(True) logger = logging.getLogger('WakaTime') set_log_level(logger, args) if len(logger.handlers) > 0: @@ -107,5 +108,23 @@ def setup_logging(args, version): ) handler.setFormatter(formatter) logger.addHandler(handler) - logging.getLogger('py.warnings').addHandler(handler) + + warnings_formatter = JsonFormatter(datefmt='%Y/%m/%d %H:%M:%S %z') + warnings_formatter.setup( + timestamp=args.timestamp, + isWrite=args.isWrite, + targetFile=args.targetFile, + version=version, + plugin=args.plugin, + verbose=args.verbose, + warnings=True, + ) + warnings_handler = logging.FileHandler(os.path.expanduser(logfile)) + warnings_handler.setFormatter(warnings_formatter) + logging.getLogger('py.warnings').addHandler(warnings_handler) + try: + logging.captureWarnings(True) + except AttributeError: + pass # Python >= 2.7 is needed to capture warnings + return logger diff --git a/packages/wakatime/stats.py b/packages/wakatime/stats.py index a245c1d..4e231dd 100644 --- a/packages/wakatime/stats.py +++ b/packages/wakatime/stats.py @@ -20,13 +20,15 @@ if sys.version_info[0] == 2: sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2')) else: sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py3')) -from pygments.lexers import guess_lexer_for_filename +from pygments.lexers import get_lexer_by_name, guess_lexer_for_filename +from pygments.modeline import get_filetype_from_buffer +from pygments.util import ClassNotFound log = logging.getLogger('WakaTime') -# force file name extensions to be recognized as a certain language +# extensions taking priority over lexer EXTENSIONS = { 'j2': 'HTML', 'markdown': 'Markdown', @@ -34,6 +36,8 @@ EXTENSIONS = { 'mdown': 'Markdown', 'twig': 'Twig', } + +# lexers to human readable languages TRANSLATIONS = { 'CSS+Genshi Text': 'CSS', 'CSS+Lasso': 'CSS', @@ -45,31 +49,127 @@ TRANSLATIONS = { 'RHTML': 'HTML', } +# extensions for when no lexer is found +AUXILIARY_EXTENSIONS = { + 'vb': 'VB.net', +} + def guess_language(file_name): - language, lexer = None, None - try: - with open(file_name, 'r', encoding='utf-8') as fh: - lexer = guess_lexer_for_filename(file_name, fh.read(512000)) - except: - pass + """Guess lexer and language for a file. + + Returns (language, lexer) tuple where language is a unicode string. + """ + + lexer = smart_guess_lexer(file_name) + + language = None + + # guess language from file extension if file_name: - language = guess_language_from_extension(file_name.rsplit('.', 1)[-1]) - if lexer and language is None: - language = translate_language(u(lexer.name)) + language = get_language_from_extension(file_name, EXTENSIONS) + + # get language from lexer if we didn't have a hard-coded extension rule + if language is None and lexer: + language = u(lexer.name) + + if language is None: + language = get_language_from_extension(file_name, AUXILIARY_EXTENSIONS) + + if language is not None: + language = translate_language(language) + return language, lexer -def guess_language_from_extension(extension): +def smart_guess_lexer(file_name): + """Guess Pygments lexer for a file. + + Looks for a vim modeline in file contents, then compares the accuracy + of that lexer with a second guess. The second guess looks up all lexers + matching the file name, then runs a text analysis for the best choice. + """ + lexer = None + + text = get_file_contents(file_name) + + lexer_1, accuracy_1 = guess_lexer_using_filename(file_name, text) + lexer_2, accuracy_2 = guess_lexer_using_modeline(text) + + if lexer_1: + lexer = lexer_1 + if (lexer_2 and accuracy_2 and + (not accuracy_1 or accuracy_2 > accuracy_1)): + lexer = lexer_2 + + return lexer + + +def guess_lexer_using_filename(file_name, text): + """Guess lexer for given text, limited to lexers for this file's extension. + + Returns a tuple of (lexer, accuracy). + """ + + lexer, accuracy = None, None + + try: + lexer = guess_lexer_for_filename(file_name, text) + except: + pass + + if lexer is not None: + try: + accuracy = lexer.analyse_text(text) + except: + pass + + return lexer, accuracy + + +def guess_lexer_using_modeline(text): + """Guess lexer for given text using Vim modeline. + + Returns a tuple of (lexer, accuracy). + """ + + lexer, accuracy = None, None + + file_type = get_filetype_from_buffer(text) + if file_type is not None: + try: + lexer = get_lexer_by_name(file_type) + except ClassNotFound: + pass + + if lexer is not None: + try: + accuracy = lexer.analyse_text(text) + except: + pass + + return lexer, accuracy + + +def get_language_from_extension(file_name, extension_map): + """Returns a matching language for the given file_name using extension_map. + """ + + extension = file_name.rsplit('.', 1)[-1] if len(file_name.rsplit('.', 1)) > 1 else None + if extension: - if extension in EXTENSIONS: - return EXTENSIONS[extension] - if extension.lower() in EXTENSIONS: - return EXTENSIONS[extension.lower()] + if extension in extension_map: + return extension_map[extension] + if extension.lower() in extension_map: + return extension_map[extension.lower()] + return None def translate_language(language): + """Turns Pygments lexer class name string into human-readable language. + """ + if language in TRANSLATIONS: language = TRANSLATIONS[language] return language @@ -107,3 +207,16 @@ def get_file_stats(file_name, notfile=False, lineno=None, cursorpos=None): 'cursorpos': cursorpos, } return stats + + +def get_file_contents(file_name): + """Returns the first 512000 bytes of the file's contents. + """ + + text = None + try: + with open(file_name, 'r', encoding='utf-8') as fh: + text = fh.read(512000) + except: + pass + return text