improve java dependency detection

2015-09-16 14:59:30 -07:00 · 2015-09-16 14:59:30 -07:00 · 4d45305650
commit 4d45305650
parent 5265160aa8
8 changed files with 161 additions and 18 deletions
--- a/wakatime/languages/init.py
+++ b/wakatime/languages/init.py
@ -10,6 +10,7 @@
 """

 import logging
+import re
 import sys
 import traceback

@ -24,12 +25,14 @@ class TokenParser(object):
    language, inherit from this class and implement the :meth:`parse` method
    to return a list of dependency strings.
    """
+    exclude = []

    def __init__(self, source_file, lexer=None):
        self.tokens = []
        self.dependencies = []
        self.source_file = source_file
        self.lexer = lexer
+        self.exclude = [re.compile(x, re.IGNORECASE) for x in self.exclude]

    def parse(self, tokens=[]):
        """ Should return a list of dependencies.
@ -48,6 +51,9 @@ class TokenParser(object):
            strip_whitespace=strip_whitespace,
        )

+    def partial(self, token):
+        return u(token).split('.')[-1]
+
    def _extract_tokens(self):
        if self.lexer:
            try:
@ -77,7 +83,13 @@ class TokenParser(object):
        if strip_whitespace:
            dep = dep.strip()
        if dep and (not separator or not dep.startswith(separator)):
-            self.dependencies.append(dep)
+            should_exclude = False
+            for compiled in self.exclude:
+                if compiled.search(dep):
+                    should_exclude = True
+                    break
+            if not should_exclude:
+                self.dependencies.append(dep)


 class DependencyParser(object):
--- a/wakatime/languages/c_cpp.py
+++ b/wakatime/languages/c_cpp.py
@ -10,7 +10,6 @@
 """

 from . import TokenParser
-from ..compat import u


 class CppParser(TokenParser):
@ -23,7 +22,7 @@ class CppParser(TokenParser):
        return self.dependencies

    def _process_token(self, token, content):
-        if u(token).split('.')[-1] == 'Preproc':
+        if self.first(token) == 'Preproc':
            self._process_preproc(token, content)
        else:
            self._process_other(token, content)
--- a/wakatime/languages/dotnet.py
+++ b/wakatime/languages/dotnet.py
@ -10,7 +10,6 @@
 """

 from . import TokenParser
-from ..compat import u


 class CSharpParser(TokenParser):
@ -23,7 +22,7 @@ class CSharpParser(TokenParser):
        return self.dependencies

    def _process_token(self, token, content):
-        if u(token).split('.')[-1] == 'Namespace':
+        if self.partial(token) == 'Namespace':
            self._process_namespace(token, content)
        else:
            self._process_other(token, content)
--- a/wakatime/languages/jvm.py
+++ b/wakatime/languages/jvm.py
@ -14,6 +14,16 @@ from ..compat import u


 class JavaParser(TokenParser):
+    exclude = [
+        r'^java\.',
+        r'^javax\.',
+        r'^import$',
+        r'^package$',
+        r'^namespace$',
+        r'^static$',
+    ]
+    state = None
+    buffer = u('')

    def parse(self, tokens=[]):
        if not tokens and not self.tokens:
@ -23,14 +33,66 @@ class JavaParser(TokenParser):
        return self.dependencies

    def _process_token(self, token, content):
-        if u(token).split('.')[-1] == 'Namespace':
+        if self.partial(token) == 'Namespace':
            self._process_namespace(token, content)
+        if self.partial(token) == 'Name':
+            self._process_name(token, content)
+        elif self.partial(token) == 'Attribute':
+            self._process_attribute(token, content)
+        elif self.partial(token) == 'Operator':
+            self._process_operator(token, content)
        else:
            self._process_other(token, content)

    def _process_namespace(self, token, content):
-        if content != 'import' and content != 'package' and content != 'namespace':
-            self.append(content, truncate=True)
+        if u(content) == u('import'):
+            self.state = 'import'
+
+        elif self.state == 'import':
+            keywords = [
+                u('package'),
+                u('namespace'),
+                u('static'),
+            ]
+            if u(content) in keywords:
+                return
+            self.buffer = u('{0}{1}').format(self.buffer, u(content))
+
+        elif self.state == 'import-finished':
+            content = content.split(u('.'))
+
+            if len(content) == 1:
+                self.append(content[0])
+
+            elif len(content) > 1:
+                if len(content[0]) == 3:
+                    content = content[1:]
+                if content[-1] == u('*'):
+                    content = content[:len(content) - 1]
+
+                if len(content) == 1:
+                    self.append(content[0])
+                elif len(content) > 1:
+                    self.append(u('.').join(content[:2]))
+
+            self.state = None
+
+    def _process_name(self, token, content):
+        if self.state == 'import':
+            self.buffer = u('{0}{1}').format(self.buffer, u(content))
+
+    def _process_attribute(self, token, content):
+        if self.state == 'import':
+            self.buffer = u('{0}{1}').format(self.buffer, u(content))
+
+    def _process_operator(self, token, content):
+        if u(content) == u(';'):
+            self.state = 'import-finished'
+            self._process_namespace(token, self.buffer)
+            self.state = None
+            self.buffer = u('')
+        elif self.state == 'import':
+            self.buffer = u('{0}{1}').format(self.buffer, u(content))

    def _process_other(self, token, content):
        pass
--- a/wakatime/languages/php.py
+++ b/wakatime/languages/php.py
@ -25,7 +25,7 @@ class PhpParser(TokenParser):
        return self.dependencies

    def _process_token(self, token, content):
-        if u(token).split('.')[-1] == 'Keyword':
+        if self.partial(token) == 'Keyword':
            self._process_keyword(token, content)
        elif u(token) == 'Token.Literal.String.Single' or u(token) == 'Token.Literal.String.Double':
            self._process_literal_string(token, content)
@ -33,9 +33,9 @@ class PhpParser(TokenParser):
            self._process_name(token, content)
        elif u(token) == 'Token.Name.Function':
            self._process_function(token, content)
-        elif u(token).split('.')[-1] == 'Punctuation':
+        elif self.partial(token) == 'Punctuation':
            self._process_punctuation(token, content)
-        elif u(token).split('.')[-1] == 'Text':
+        elif self.partial(token) == 'Text':
            self._process_text(token, content)
        else:
            self._process_other(token, content)
--- a/wakatime/languages/python.py
+++ b/wakatime/languages/python.py
@ -10,7 +10,6 @@
 """

 from . import TokenParser
-from ..compat import u


 class PythonParser(TokenParser):
@ -26,17 +25,17 @@ class PythonParser(TokenParser):
        return self.dependencies

    def _process_token(self, token, content):
-        if u(token).split('.')[-1] == 'Namespace':
+        if self.partial(token) == 'Namespace':
            self._process_namespace(token, content)
-        elif u(token).split('.')[-1] == 'Name':
+        elif self.partial(token) == 'Names':
            self._process_name(token, content)
-        elif u(token).split('.')[-1] == 'Word':
+        elif self.partial(token) == 'Word':
            self._process_word(token, content)
-        elif u(token).split('.')[-1] == 'Operator':
+        elif self.partial(token) == 'Operator':
            self._process_operator(token, content)
-        elif u(token).split('.')[-1] == 'Punctuation':
+        elif self.partial(token) == 'Punctuation':
            self._process_punctuation(token, content)
-        elif u(token).split('.')[-1] == 'Text':
+        elif self.partial(token) == 'Text':
            self._process_text(token, content)
        else:
            self._process_other(token, content)