don't truncate dependencies by default. improve python dependency parser.

This commit is contained in:
Alan Hamlett 2014-12-24 23:09:15 -06:00
parent 40c8067dbf
commit 24d066c572
6 changed files with 55 additions and 40 deletions

View File

@ -12,7 +12,7 @@
import logging
import traceback
from ..compat import open, import_module
from ..compat import u, open, import_module
log = logging.getLogger('WakaTime')
@ -39,8 +39,17 @@ class TokenParser(object):
self.tokens = self._extract_tokens()
raise Exception('Not yet implemented.')
def append(self, dep, truncate=True):
self._save_dependency(dep, truncate=truncate)
def append(self, dep, truncate=False, separator=None, truncate_to=None,
strip_whitespace=True):
if dep == 'as':
print('***************** as')
self._save_dependency(
dep,
truncate=truncate,
truncate_to=truncate_to,
separator=separator,
strip_whitespace=strip_whitespace,
)
def _extract_tokens(self):
if self.lexer:
@ -48,8 +57,18 @@ class TokenParser(object):
return self.lexer.get_tokens_unprocessed(fh.read(512000))
return []
def _save_dependency(self, dep, truncate=True):
dep = dep.strip().split('.')[0].strip() if truncate else dep.strip()
def _save_dependency(self, dep, truncate=False, separator=None,
truncate_to=None, strip_whitespace=True):
if truncate:
if separator is None:
separator = u('.')
separator = u(separator)
dep = dep.split(separator)
if truncate_to is None or truncate_to < 0 or truncate_to > len(dep) - 1:
truncate_to = len(dep) - 1
dep = dep[0] if len(dep) == 1 else separator.join(dep[0:truncate_to])
if strip_whitespace:
dep = dep.strip()
if dep:
self.dependencies.append(dep)

View File

@ -31,7 +31,7 @@ class CppParser(TokenParser):
def _process_preproc(self, token, content):
if content.strip().startswith('include ') or content.strip().startswith("include\t"):
content = content.replace('include', '', 1).strip()
self.append(content, truncate=False)
self.append(content)
def _process_other(self, token, content):
pass

View File

@ -30,9 +30,7 @@ class CSharpParser(TokenParser):
def _process_namespace(self, token, content):
if content != 'import' and content != 'package' and content != 'namespace':
content = content.split('.')
content = content[0] if len(content) == 1 else '.'.join(content[0:len(content)-1])
self.append(content, truncate=False)
self.append(content, truncate=True)
def _process_other(self, token, content):
pass

View File

@ -30,9 +30,7 @@ class JavaParser(TokenParser):
def _process_namespace(self, token, content):
if content != 'import' and content != 'package' and content != 'namespace':
content = content.split('.')
content = content[0] if len(content) == 1 else '.'.join(content[0:len(content)-1])
self.append(content, truncate=False)
self.append(content, truncate=True)
def _process_other(self, token, content):
pass

View File

@ -42,15 +42,11 @@ class PhpParser(TokenParser):
def _process_name(self, token, content):
if self.state == 'use':
content = content.split("\\")
content = content[0] if len(content) == 1 else "\\".join(content[0:len(content)-1])
self.append(content, truncate=False)
self.append(content, truncate=True, separator=u("\\"))
def _process_function(self, token, content):
if self.state == 'use function':
content = content.split("\\")
content = content[0] if len(content) == 1 else "\\".join(content[0:len(content)-1])
self.append(content, truncate=False)
self.append(content, truncate=True, separator=u("\\"))
self.state = 'use'
def _process_keyword(self, token, content):
@ -71,7 +67,7 @@ class PhpParser(TokenParser):
content = content.strip()
if u(token) == 'Token.Literal.String.Double':
content = u('"{0}"').format(content)
self.append(content, truncate=False)
self.append(content)
self.state = None
def _process_punctuation(self, token, content):

View File

@ -45,7 +45,10 @@ class PythonParser(TokenParser):
if self.state is None:
self.state = content
else:
self._process_import(token, content)
if content == 'as':
self.nonpackage = True
else:
self._process_import(token, content)
def _process_name(self, token, content):
if self.state is not None:
@ -53,13 +56,13 @@ class PythonParser(TokenParser):
self.nonpackage = False
else:
if self.state == 'from':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
if self.state == 'from-2' and content != 'import':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
elif self.state == 'import':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
elif self.state == 'import-2':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
else:
self.state = None
@ -69,13 +72,13 @@ class PythonParser(TokenParser):
self.nonpackage = False
else:
if self.state == 'from':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
if self.state == 'from-2' and content != 'import':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
elif self.state == 'import':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
elif self.state == 'import-2':
self.append(content)
self.append(content, truncate=True, truncate_to=0)
else:
self.state = None
@ -101,16 +104,17 @@ class PythonParser(TokenParser):
pass
def _process_import(self, token, content):
if not self.nonpackage:
if self.state == 'from':
self.append(content, truncate=True, truncate_to=0)
self.state = 'from-2'
elif self.state == 'from-2' and content != 'import':
self.append(content, truncate=True, truncate_to=0)
elif self.state == 'import':
self.append(content, truncate=True, truncate_to=0)
self.state = 'import-2'
elif self.state == 'import-2':
self.append(content, truncate=True, truncate_to=0)
else:
self.state = None
self.nonpackage = False
if self.state == 'from':
self.append(content)
self.state = 'from-2'
elif self.state == 'from-2' and content != 'import':
self.append(content)
elif self.state == 'import':
self.append(content)
self.state = 'import-2'
elif self.state == 'import-2':
self.append(content)
else:
self.state = None