improve java dependency detection

This commit is contained in:
Alan Hamlett 2015-09-16 14:59:30 -07:00
parent 5265160aa8
commit 4d45305650
8 changed files with 161 additions and 18 deletions

View file

@ -10,6 +10,7 @@
"""
import logging
import re
import sys
import traceback
@ -24,12 +25,14 @@ class TokenParser(object):
language, inherit from this class and implement the :meth:`parse` method
to return a list of dependency strings.
"""
exclude = []
def __init__(self, source_file, lexer=None):
self.tokens = []
self.dependencies = []
self.source_file = source_file
self.lexer = lexer
self.exclude = [re.compile(x, re.IGNORECASE) for x in self.exclude]
def parse(self, tokens=[]):
""" Should return a list of dependencies.
@ -48,6 +51,9 @@ class TokenParser(object):
strip_whitespace=strip_whitespace,
)
def partial(self, token):
return u(token).split('.')[-1]
def _extract_tokens(self):
if self.lexer:
try:
@ -77,7 +83,13 @@ class TokenParser(object):
if strip_whitespace:
dep = dep.strip()
if dep and (not separator or not dep.startswith(separator)):
self.dependencies.append(dep)
should_exclude = False
for compiled in self.exclude:
if compiled.search(dep):
should_exclude = True
break
if not should_exclude:
self.dependencies.append(dep)
class DependencyParser(object):

View file

@ -10,7 +10,6 @@
"""
from . import TokenParser
from ..compat import u
class CppParser(TokenParser):
@ -23,7 +22,7 @@ class CppParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Preproc':
if self.first(token) == 'Preproc':
self._process_preproc(token, content)
else:
self._process_other(token, content)

View file

@ -10,7 +10,6 @@
"""
from . import TokenParser
from ..compat import u
class CSharpParser(TokenParser):
@ -23,7 +22,7 @@ class CSharpParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Namespace':
if self.partial(token) == 'Namespace':
self._process_namespace(token, content)
else:
self._process_other(token, content)

View file

@ -14,6 +14,16 @@ from ..compat import u
class JavaParser(TokenParser):
exclude = [
r'^java\.',
r'^javax\.',
r'^import$',
r'^package$',
r'^namespace$',
r'^static$',
]
state = None
buffer = u('')
def parse(self, tokens=[]):
if not tokens and not self.tokens:
@ -23,14 +33,66 @@ class JavaParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Namespace':
if self.partial(token) == 'Namespace':
self._process_namespace(token, content)
if self.partial(token) == 'Name':
self._process_name(token, content)
elif self.partial(token) == 'Attribute':
self._process_attribute(token, content)
elif self.partial(token) == 'Operator':
self._process_operator(token, content)
else:
self._process_other(token, content)
def _process_namespace(self, token, content):
if content != 'import' and content != 'package' and content != 'namespace':
self.append(content, truncate=True)
if u(content) == u('import'):
self.state = 'import'
elif self.state == 'import':
keywords = [
u('package'),
u('namespace'),
u('static'),
]
if u(content) in keywords:
return
self.buffer = u('{0}{1}').format(self.buffer, u(content))
elif self.state == 'import-finished':
content = content.split(u('.'))
if len(content) == 1:
self.append(content[0])
elif len(content) > 1:
if len(content[0]) == 3:
content = content[1:]
if content[-1] == u('*'):
content = content[:len(content) - 1]
if len(content) == 1:
self.append(content[0])
elif len(content) > 1:
self.append(u('.').join(content[:2]))
self.state = None
def _process_name(self, token, content):
if self.state == 'import':
self.buffer = u('{0}{1}').format(self.buffer, u(content))
def _process_attribute(self, token, content):
if self.state == 'import':
self.buffer = u('{0}{1}').format(self.buffer, u(content))
def _process_operator(self, token, content):
if u(content) == u(';'):
self.state = 'import-finished'
self._process_namespace(token, self.buffer)
self.state = None
self.buffer = u('')
elif self.state == 'import':
self.buffer = u('{0}{1}').format(self.buffer, u(content))
def _process_other(self, token, content):
pass

View file

@ -25,7 +25,7 @@ class PhpParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Keyword':
if self.partial(token) == 'Keyword':
self._process_keyword(token, content)
elif u(token) == 'Token.Literal.String.Single' or u(token) == 'Token.Literal.String.Double':
self._process_literal_string(token, content)
@ -33,9 +33,9 @@ class PhpParser(TokenParser):
self._process_name(token, content)
elif u(token) == 'Token.Name.Function':
self._process_function(token, content)
elif u(token).split('.')[-1] == 'Punctuation':
elif self.partial(token) == 'Punctuation':
self._process_punctuation(token, content)
elif u(token).split('.')[-1] == 'Text':
elif self.partial(token) == 'Text':
self._process_text(token, content)
else:
self._process_other(token, content)

View file

@ -10,7 +10,6 @@
"""
from . import TokenParser
from ..compat import u
class PythonParser(TokenParser):
@ -26,17 +25,17 @@ class PythonParser(TokenParser):
return self.dependencies
def _process_token(self, token, content):
if u(token).split('.')[-1] == 'Namespace':
if self.partial(token) == 'Namespace':
self._process_namespace(token, content)
elif u(token).split('.')[-1] == 'Name':
elif self.partial(token) == 'Names':
self._process_name(token, content)
elif u(token).split('.')[-1] == 'Word':
elif self.partial(token) == 'Word':
self._process_word(token, content)
elif u(token).split('.')[-1] == 'Operator':
elif self.partial(token) == 'Operator':
self._process_operator(token, content)
elif u(token).split('.')[-1] == 'Punctuation':
elif self.partial(token) == 'Punctuation':
self._process_punctuation(token, content)
elif u(token).split('.')[-1] == 'Text':
elif self.partial(token) == 'Text':
self._process_text(token, content)
else:
self._process_other(token, content)