improve java dependency detection

This commit is contained in:
Alan Hamlett 2015-09-16 14:59:30 -07:00
parent 5265160aa8
commit 4d45305650
8 changed files with 161 additions and 18 deletions

View file

@ -0,0 +1,20 @@
// Hello.java
import java.io.*;
import static java.lang.Math.*;
import static com.googlecode.javacv.jna.highgui.cvReleaseCapture;
import javax.servlet.*;
import com.colorfulwolf.webcamapplet.gui.ImagePanel;
import com.foobar.*;
public class Hello extends GenericServlet {
public void service(final ServletRequest request, final ServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html");
final PrintWriter pw = response.getWriter();
try {
pw.println("Hello, world!");
} finally {
pw.close();
}
}
}

View file

@ -131,3 +131,55 @@ class LanguagesTestCase(utils.TestCase):
self.assertIn(dep, self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0]['dependencies']) self.assertIn(dep, self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0]['dependencies'])
self.assertEquals(stats, json.loads(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][1])) self.assertEquals(stats, json.loads(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][1]))
self.patched['wakatime.offlinequeue.Queue.pop'].assert_not_called() self.patched['wakatime.offlinequeue.Queue.pop'].assert_not_called()
def test_java_dependencies_detected(self):
response = Response()
response.status_code = 0
self.patched['wakatime.packages.requests.adapters.HTTPAdapter.send'].return_value = response
now = u(int(time.time()))
entity = 'tests/samples/codefiles/java.java'
config = 'tests/samples/configs/good_config.cfg'
args = ['--file', entity, '--config', config, '--time', now]
retval = execute(args)
self.assertEquals(retval, 102)
self.assertEquals(sys.stdout.getvalue(), '')
self.assertEquals(sys.stderr.getvalue(), '')
self.patched['wakatime.session_cache.SessionCache.get'].assert_called_once_with()
self.patched['wakatime.session_cache.SessionCache.delete'].assert_called_once_with()
self.patched['wakatime.session_cache.SessionCache.save'].assert_not_called()
heartbeat = {
'language': u('Java'),
'lines': 20,
'entity': os.path.realpath(entity),
'project': u(os.path.basename(os.path.realpath('.'))),
'dependencies': ANY,
'branch': os.environ.get('TRAVIS_COMMIT', ANY),
'time': float(now),
'type': 'file',
}
stats = {
u('cursorpos'): None,
u('dependencies'): ANY,
u('language'): u('Java'),
u('lineno'): None,
u('lines'): 20,
}
expected_dependencies = [
'googlecode.javacv',
'colorfulwolf.webcamapplet',
'foobar',
]
def normalize(items):
return sorted([u(x) for x in items])
self.patched['wakatime.offlinequeue.Queue.push'].assert_called_once_with(heartbeat, ANY, None)
dependencies = self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][0]['dependencies']
self.assertEquals(normalize(dependencies), normalize(expected_dependencies))
self.assertEquals(stats, json.loads(self.patched['wakatime.offlinequeue.Queue.push'].call_args[0][1]))
self.patched['wakatime.offlinequeue.Queue.pop'].assert_not_called()

View file

@ -10,6 +10,7 @@
""" """
import logging import logging
import re
import sys import sys
import traceback import traceback
@ -24,12 +25,14 @@ class TokenParser(object):
language, inherit from this class and implement the :meth:`parse` method language, inherit from this class and implement the :meth:`parse` method
to return a list of dependency strings. to return a list of dependency strings.
""" """
exclude = []
def __init__(self, source_file, lexer=None): def __init__(self, source_file, lexer=None):
self.tokens = [] self.tokens = []
self.dependencies = [] self.dependencies = []
self.source_file = source_file self.source_file = source_file
self.lexer = lexer self.lexer = lexer
self.exclude = [re.compile(x, re.IGNORECASE) for x in self.exclude]
def parse(self, tokens=[]): def parse(self, tokens=[]):
""" Should return a list of dependencies. """ Should return a list of dependencies.
@ -48,6 +51,9 @@ class TokenParser(object):
strip_whitespace=strip_whitespace, strip_whitespace=strip_whitespace,
) )
def partial(self, token):
return u(token).split('.')[-1]
def _extract_tokens(self): def _extract_tokens(self):
if self.lexer: if self.lexer:
try: try:
@ -77,7 +83,13 @@ class TokenParser(object):
if strip_whitespace: if strip_whitespace:
dep = dep.strip() dep = dep.strip()
if dep and (not separator or not dep.startswith(separator)): if dep and (not separator or not dep.startswith(separator)):
self.dependencies.append(dep) should_exclude = False
for compiled in self.exclude:
if compiled.search(dep):
should_exclude = True
break
if not should_exclude:
self.dependencies.append(dep)
class DependencyParser(object): class DependencyParser(object):

View file

@ -10,7 +10,6 @@
""" """
from . import TokenParser from . import TokenParser
from ..compat import u
class CppParser(TokenParser): class CppParser(TokenParser):
@ -23,7 +22,7 @@ class CppParser(TokenParser):
return self.dependencies return self.dependencies
def _process_token(self, token, content): def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Preproc': if self.first(token) == 'Preproc':
self._process_preproc(token, content) self._process_preproc(token, content)
else: else:
self._process_other(token, content) self._process_other(token, content)

View file

@ -10,7 +10,6 @@
""" """
from . import TokenParser from . import TokenParser
from ..compat import u
class CSharpParser(TokenParser): class CSharpParser(TokenParser):
@ -23,7 +22,7 @@ class CSharpParser(TokenParser):
return self.dependencies return self.dependencies
def _process_token(self, token, content): def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Namespace': if self.partial(token) == 'Namespace':
self._process_namespace(token, content) self._process_namespace(token, content)
else: else:
self._process_other(token, content) self._process_other(token, content)

View file

@ -14,6 +14,16 @@ from ..compat import u
class JavaParser(TokenParser): class JavaParser(TokenParser):
exclude = [
r'^java\.',
r'^javax\.',
r'^import$',
r'^package$',
r'^namespace$',
r'^static$',
]
state = None
buffer = u('')
def parse(self, tokens=[]): def parse(self, tokens=[]):
if not tokens and not self.tokens: if not tokens and not self.tokens:
@ -23,14 +33,66 @@ class JavaParser(TokenParser):
return self.dependencies return self.dependencies
def _process_token(self, token, content): def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Namespace': if self.partial(token) == 'Namespace':
self._process_namespace(token, content) self._process_namespace(token, content)
if self.partial(token) == 'Name':
self._process_name(token, content)
elif self.partial(token) == 'Attribute':
self._process_attribute(token, content)
elif self.partial(token) == 'Operator':
self._process_operator(token, content)
else: else:
self._process_other(token, content) self._process_other(token, content)
def _process_namespace(self, token, content): def _process_namespace(self, token, content):
if content != 'import' and content != 'package' and content != 'namespace': if u(content) == u('import'):
self.append(content, truncate=True) self.state = 'import'
elif self.state == 'import':
keywords = [
u('package'),
u('namespace'),
u('static'),
]
if u(content) in keywords:
return
self.buffer = u('{0}{1}').format(self.buffer, u(content))
elif self.state == 'import-finished':
content = content.split(u('.'))
if len(content) == 1:
self.append(content[0])
elif len(content) > 1:
if len(content[0]) == 3:
content = content[1:]
if content[-1] == u('*'):
content = content[:len(content) - 1]
if len(content) == 1:
self.append(content[0])
elif len(content) > 1:
self.append(u('.').join(content[:2]))
self.state = None
def _process_name(self, token, content):
if self.state == 'import':
self.buffer = u('{0}{1}').format(self.buffer, u(content))
def _process_attribute(self, token, content):
if self.state == 'import':
self.buffer = u('{0}{1}').format(self.buffer, u(content))
def _process_operator(self, token, content):
if u(content) == u(';'):
self.state = 'import-finished'
self._process_namespace(token, self.buffer)
self.state = None
self.buffer = u('')
elif self.state == 'import':
self.buffer = u('{0}{1}').format(self.buffer, u(content))
def _process_other(self, token, content): def _process_other(self, token, content):
pass pass

View file

@ -25,7 +25,7 @@ class PhpParser(TokenParser):
return self.dependencies return self.dependencies
def _process_token(self, token, content): def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Keyword': if self.partial(token) == 'Keyword':
self._process_keyword(token, content) self._process_keyword(token, content)
elif u(token) == 'Token.Literal.String.Single' or u(token) == 'Token.Literal.String.Double': elif u(token) == 'Token.Literal.String.Single' or u(token) == 'Token.Literal.String.Double':
self._process_literal_string(token, content) self._process_literal_string(token, content)
@ -33,9 +33,9 @@ class PhpParser(TokenParser):
self._process_name(token, content) self._process_name(token, content)
elif u(token) == 'Token.Name.Function': elif u(token) == 'Token.Name.Function':
self._process_function(token, content) self._process_function(token, content)
elif u(token).split('.')[-1] == 'Punctuation': elif self.partial(token) == 'Punctuation':
self._process_punctuation(token, content) self._process_punctuation(token, content)
elif u(token).split('.')[-1] == 'Text': elif self.partial(token) == 'Text':
self._process_text(token, content) self._process_text(token, content)
else: else:
self._process_other(token, content) self._process_other(token, content)

View file

@ -10,7 +10,6 @@
""" """
from . import TokenParser from . import TokenParser
from ..compat import u
class PythonParser(TokenParser): class PythonParser(TokenParser):
@ -26,17 +25,17 @@ class PythonParser(TokenParser):
return self.dependencies return self.dependencies
def _process_token(self, token, content): def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Namespace': if self.partial(token) == 'Namespace':
self._process_namespace(token, content) self._process_namespace(token, content)
elif u(token).split('.')[-1] == 'Name': elif self.partial(token) == 'Names':
self._process_name(token, content) self._process_name(token, content)
elif u(token).split('.')[-1] == 'Word': elif self.partial(token) == 'Word':
self._process_word(token, content) self._process_word(token, content)
elif u(token).split('.')[-1] == 'Operator': elif self.partial(token) == 'Operator':
self._process_operator(token, content) self._process_operator(token, content)
elif u(token).split('.')[-1] == 'Punctuation': elif self.partial(token) == 'Punctuation':
self._process_punctuation(token, content) self._process_punctuation(token, content)
elif u(token).split('.')[-1] == 'Text': elif self.partial(token) == 'Text':
self._process_text(token, content) self._process_text(token, content)
else: else:
self._process_other(token, content) self._process_other(token, content)