improve java dependency detection
This commit is contained in:
parent
5265160aa8
commit
4d45305650
8 changed files with 161 additions and 18 deletions
|
@ -10,6 +10,7 @@
|
|||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
|
@ -24,12 +25,14 @@ class TokenParser(object):
|
|||
language, inherit from this class and implement the :meth:`parse` method
|
||||
to return a list of dependency strings.
|
||||
"""
|
||||
exclude = []
|
||||
|
||||
def __init__(self, source_file, lexer=None):
|
||||
self.tokens = []
|
||||
self.dependencies = []
|
||||
self.source_file = source_file
|
||||
self.lexer = lexer
|
||||
self.exclude = [re.compile(x, re.IGNORECASE) for x in self.exclude]
|
||||
|
||||
def parse(self, tokens=[]):
|
||||
""" Should return a list of dependencies.
|
||||
|
@ -48,6 +51,9 @@ class TokenParser(object):
|
|||
strip_whitespace=strip_whitespace,
|
||||
)
|
||||
|
||||
def partial(self, token):
|
||||
return u(token).split('.')[-1]
|
||||
|
||||
def _extract_tokens(self):
|
||||
if self.lexer:
|
||||
try:
|
||||
|
@ -77,7 +83,13 @@ class TokenParser(object):
|
|||
if strip_whitespace:
|
||||
dep = dep.strip()
|
||||
if dep and (not separator or not dep.startswith(separator)):
|
||||
self.dependencies.append(dep)
|
||||
should_exclude = False
|
||||
for compiled in self.exclude:
|
||||
if compiled.search(dep):
|
||||
should_exclude = True
|
||||
break
|
||||
if not should_exclude:
|
||||
self.dependencies.append(dep)
|
||||
|
||||
|
||||
class DependencyParser(object):
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
"""
|
||||
|
||||
from . import TokenParser
|
||||
from ..compat import u
|
||||
|
||||
|
||||
class CppParser(TokenParser):
|
||||
|
@ -23,7 +22,7 @@ class CppParser(TokenParser):
|
|||
return self.dependencies
|
||||
|
||||
def _process_token(self, token, content):
|
||||
if u(token).split('.')[-1] == 'Preproc':
|
||||
if self.first(token) == 'Preproc':
|
||||
self._process_preproc(token, content)
|
||||
else:
|
||||
self._process_other(token, content)
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
"""
|
||||
|
||||
from . import TokenParser
|
||||
from ..compat import u
|
||||
|
||||
|
||||
class CSharpParser(TokenParser):
|
||||
|
@ -23,7 +22,7 @@ class CSharpParser(TokenParser):
|
|||
return self.dependencies
|
||||
|
||||
def _process_token(self, token, content):
|
||||
if u(token).split('.')[-1] == 'Namespace':
|
||||
if self.partial(token) == 'Namespace':
|
||||
self._process_namespace(token, content)
|
||||
else:
|
||||
self._process_other(token, content)
|
||||
|
|
|
@ -14,6 +14,16 @@ from ..compat import u
|
|||
|
||||
|
||||
class JavaParser(TokenParser):
|
||||
exclude = [
|
||||
r'^java\.',
|
||||
r'^javax\.',
|
||||
r'^import$',
|
||||
r'^package$',
|
||||
r'^namespace$',
|
||||
r'^static$',
|
||||
]
|
||||
state = None
|
||||
buffer = u('')
|
||||
|
||||
def parse(self, tokens=[]):
|
||||
if not tokens and not self.tokens:
|
||||
|
@ -23,14 +33,66 @@ class JavaParser(TokenParser):
|
|||
return self.dependencies
|
||||
|
||||
def _process_token(self, token, content):
|
||||
if u(token).split('.')[-1] == 'Namespace':
|
||||
if self.partial(token) == 'Namespace':
|
||||
self._process_namespace(token, content)
|
||||
if self.partial(token) == 'Name':
|
||||
self._process_name(token, content)
|
||||
elif self.partial(token) == 'Attribute':
|
||||
self._process_attribute(token, content)
|
||||
elif self.partial(token) == 'Operator':
|
||||
self._process_operator(token, content)
|
||||
else:
|
||||
self._process_other(token, content)
|
||||
|
||||
def _process_namespace(self, token, content):
|
||||
if content != 'import' and content != 'package' and content != 'namespace':
|
||||
self.append(content, truncate=True)
|
||||
if u(content) == u('import'):
|
||||
self.state = 'import'
|
||||
|
||||
elif self.state == 'import':
|
||||
keywords = [
|
||||
u('package'),
|
||||
u('namespace'),
|
||||
u('static'),
|
||||
]
|
||||
if u(content) in keywords:
|
||||
return
|
||||
self.buffer = u('{0}{1}').format(self.buffer, u(content))
|
||||
|
||||
elif self.state == 'import-finished':
|
||||
content = content.split(u('.'))
|
||||
|
||||
if len(content) == 1:
|
||||
self.append(content[0])
|
||||
|
||||
elif len(content) > 1:
|
||||
if len(content[0]) == 3:
|
||||
content = content[1:]
|
||||
if content[-1] == u('*'):
|
||||
content = content[:len(content) - 1]
|
||||
|
||||
if len(content) == 1:
|
||||
self.append(content[0])
|
||||
elif len(content) > 1:
|
||||
self.append(u('.').join(content[:2]))
|
||||
|
||||
self.state = None
|
||||
|
||||
def _process_name(self, token, content):
|
||||
if self.state == 'import':
|
||||
self.buffer = u('{0}{1}').format(self.buffer, u(content))
|
||||
|
||||
def _process_attribute(self, token, content):
|
||||
if self.state == 'import':
|
||||
self.buffer = u('{0}{1}').format(self.buffer, u(content))
|
||||
|
||||
def _process_operator(self, token, content):
|
||||
if u(content) == u(';'):
|
||||
self.state = 'import-finished'
|
||||
self._process_namespace(token, self.buffer)
|
||||
self.state = None
|
||||
self.buffer = u('')
|
||||
elif self.state == 'import':
|
||||
self.buffer = u('{0}{1}').format(self.buffer, u(content))
|
||||
|
||||
def _process_other(self, token, content):
|
||||
pass
|
||||
|
|
|
@ -25,7 +25,7 @@ class PhpParser(TokenParser):
|
|||
return self.dependencies
|
||||
|
||||
def _process_token(self, token, content):
|
||||
if u(token).split('.')[-1] == 'Keyword':
|
||||
if self.partial(token) == 'Keyword':
|
||||
self._process_keyword(token, content)
|
||||
elif u(token) == 'Token.Literal.String.Single' or u(token) == 'Token.Literal.String.Double':
|
||||
self._process_literal_string(token, content)
|
||||
|
@ -33,9 +33,9 @@ class PhpParser(TokenParser):
|
|||
self._process_name(token, content)
|
||||
elif u(token) == 'Token.Name.Function':
|
||||
self._process_function(token, content)
|
||||
elif u(token).split('.')[-1] == 'Punctuation':
|
||||
elif self.partial(token) == 'Punctuation':
|
||||
self._process_punctuation(token, content)
|
||||
elif u(token).split('.')[-1] == 'Text':
|
||||
elif self.partial(token) == 'Text':
|
||||
self._process_text(token, content)
|
||||
else:
|
||||
self._process_other(token, content)
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
"""
|
||||
|
||||
from . import TokenParser
|
||||
from ..compat import u
|
||||
|
||||
|
||||
class PythonParser(TokenParser):
|
||||
|
@ -26,17 +25,17 @@ class PythonParser(TokenParser):
|
|||
return self.dependencies
|
||||
|
||||
def _process_token(self, token, content):
|
||||
if u(token).split('.')[-1] == 'Namespace':
|
||||
if self.partial(token) == 'Namespace':
|
||||
self._process_namespace(token, content)
|
||||
elif u(token).split('.')[-1] == 'Name':
|
||||
elif self.partial(token) == 'Names':
|
||||
self._process_name(token, content)
|
||||
elif u(token).split('.')[-1] == 'Word':
|
||||
elif self.partial(token) == 'Word':
|
||||
self._process_word(token, content)
|
||||
elif u(token).split('.')[-1] == 'Operator':
|
||||
elif self.partial(token) == 'Operator':
|
||||
self._process_operator(token, content)
|
||||
elif u(token).split('.')[-1] == 'Punctuation':
|
||||
elif self.partial(token) == 'Punctuation':
|
||||
self._process_punctuation(token, content)
|
||||
elif u(token).split('.')[-1] == 'Text':
|
||||
elif self.partial(token) == 'Text':
|
||||
self._process_text(token, content)
|
||||
else:
|
||||
self._process_other(token, content)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue