rana-cli/wakatime/dependencies/html.py

144 lines
4 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
"""
2018-03-11 20:29:30 +00:00
wakatime.dependencies.html
~~~~~~~~~~~~~~~~~~~~~~~~~~
2018-03-11 20:29:30 +00:00
Parse dependencies from HTML.
:copyright: (c) 2014 Alan Hamlett.
:license: BSD, see LICENSE for more details.
"""
from . import TokenParser
from ..compat import u
""" If these keywords are found in the source file, treat them as a dependency.
Must be lower-case strings.
"""
KEYWORDS = [
'_',
'$',
'angular',
2017-10-24 05:01:31 +00:00
'assert', # probably mocha
'backbone',
'batman',
'c3',
'can',
'casper',
'chai',
'chaplin',
'd3',
2017-10-24 05:01:31 +00:00
'define', # probably require
'describe', # mocha or jasmine
'eco',
'ember',
'espresso',
2017-10-24 05:01:31 +00:00
'expect', # probably jasmine
'exports', # probably npm
'express',
'gulp',
'handlebars',
'highcharts',
'jasmine',
'jquery',
'jstz',
2017-10-24 05:01:31 +00:00
'ko', # probably knockout
'm', # probably mithril
'marionette',
'meteor',
'moment',
'monitorio',
'mustache',
'phantom',
'pickadate',
'pikaday',
'qunit',
'react',
'reactive',
2017-10-24 05:01:31 +00:00
'require', # probably the commonjs spec
'ripple',
'rivets',
'socketio',
'spine',
'thorax',
'underscore',
'vue',
'way',
'zombie',
]
2018-03-11 20:29:30 +00:00
class HtmlParser(TokenParser):
tags = []
2016-06-13 14:41:17 +00:00
opening_tag = False
getting_attrs = False
current_attr = None
current_attr_value = None
def parse(self):
for index, token, content in self.tokens:
self._process_token(token, content)
return self.dependencies
def _process_token(self, token, content):
2016-06-13 14:41:17 +00:00
if u(token) == 'Token.Punctuation':
self._process_punctuation(token, content)
elif u(token) == 'Token.Name.Tag':
self._process_tag(token, content)
elif u(token) == 'Token.Literal.String':
self._process_string(token, content)
elif u(token) == 'Token.Name.Attribute':
self._process_attribute(token, content)
@property
def current_tag(self):
return None if len(self.tags) == 0 else self.tags[0]
2016-06-13 14:41:17 +00:00
def _process_punctuation(self, token, content):
if content.startswith('</') or content.startswith('/'):
2015-01-13 21:58:55 +00:00
try:
self.tags.pop(0)
except IndexError:
# ignore errors from malformed markup
pass
2016-06-13 14:41:17 +00:00
self.opening_tag = False
self.getting_attrs = False
elif content.startswith('<'):
2016-06-13 14:41:17 +00:00
self.opening_tag = True
elif content.startswith('>'):
self.opening_tag = False
self.getting_attrs = False
def _process_tag(self, token, content):
if self.opening_tag:
self.tags.insert(0, content.replace('<', '', 1).strip().lower())
self.getting_attrs = True
self.current_attr = None
def _process_attribute(self, token, content):
if self.getting_attrs:
self.current_attr = content.lower().strip('=')
self.current_attr_value = None
def _process_string(self, token, content):
if self.getting_attrs and self.current_attr is not None:
if content.endswith('"') or content.endswith("'"):
if self.current_attr_value is not None:
self.current_attr_value += content
if self.current_tag == 'script' and self.current_attr == 'src':
self.append(self.current_attr_value)
self.current_attr = None
self.current_attr_value = None
else:
if len(content) == 1:
self.current_attr_value = content
else:
if self.current_tag == 'script' and self.current_attr == 'src':
self.append(content)
self.current_attr = None
self.current_attr_value = None
elif content.startswith('"') or content.startswith("'"):
if self.current_attr_value is None:
self.current_attr_value = content