dependency parser base models

This commit is contained in:
Alan Hamlett 2014-12-22 23:12:57 -06:00
parent f6f090ee1f
commit 67c5f02a80
2 changed files with 78 additions and 11 deletions

View File

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
"""
wakatime.languages
~~~~~~~~~~~~~~~~~~
Parse dependencies from a source code file.
:copyright: (c) 2013 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from ..compat import open, import_module
class TokenParser(object):
source_file = None
lexer = None
dependencies = []
tokens = []
def __init__(self, source_file, lexer=None):
self.source_file = source_file
self.lexer = lexer
def parse(self, tokens=[]):
""" Should return a list of dependencies.
"""
if not tokens and not self.tokens:
self.tokens = self._extract_tokens()
raise Exception('Not yet implemented.')
def append(self, dep):
self._save_dependency(dep)
def _extract_tokens(self):
with open(self.source_file, 'r', encoding='utf-8') as fh:
return self.lexer.get_tokens_unprocessed(fh.read(512000))
def _save_dependency(self, dep):
dep = dep.strip().split('.')[0].strip()
if dep:
self.dependencies.append(dep)
class DependencyParser(object):
source_file = None
lexer = None
parser = None
def __init__(self, source_file, lexer):
self.source_file = source_file
self.lexer = lexer
try:
module_name = self.lexer.__module__.split('.')[-1]
class_name = self.lexer.__class__.__name__.replace('Lexer', 'Parser', 1)
module = import_module('.%s' % module_name, package=__package__)
self.parser = getattr(module, class_name)
except ImportError:
pass
def parse(self):
if self.parser:
plugin = self.parser(self.source_file, lexer=self.lexer)
dependencies = plugin.parse()
return list(set(dependencies))
return []

View File

@ -14,6 +14,7 @@ import os
import sys
from .compat import u, open
from .languages import DependencyParser
if sys.version_info[0] == 2:
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'packages', 'pygments_py2'))
@ -46,20 +47,17 @@ TRANSLATIONS = {
def guess_language(file_name):
if file_name:
language = guess_language_from_extension(file_name.rsplit('.', 1)[-1])
if language:
return language
lexer = None
language, lexer = None, None
try:
with open(file_name, 'r', encoding='utf-8') as fh:
lexer = guess_lexer_for_filename(file_name, fh.read(512000))
except:
pass
if lexer:
return translate_language(u(lexer.name))
else:
return None
if file_name:
language = guess_language_from_extension(file_name.rsplit('.', 1)[-1])
if lexer and language is None:
language = translate_language(u(lexer.name))
return language, lexer
def guess_language_from_extension(extension):
@ -89,9 +87,11 @@ def number_lines_in_file(file_name):
def get_file_stats(file_name):
dependencies = []
language, lexer = guess_language(file_name)
parser = DependencyParser(file_name, lexer)
dependencies = parser.parse()
stats = {
'language': guess_language(file_name),
'language': language,
'dependencies': dependencies,
'lines': number_lines_in_file(file_name),
}