# -*- coding: utf-8 -*- """ wakatime.dependencies.html ~~~~~~~~~~~~~~~~~~~~~~~~~~ Parse dependencies from HTML. :copyright: (c) 2014 Alan Hamlett. :license: BSD, see LICENSE for more details. """ from . import TokenParser from ..compat import u """ If these keywords are found in the source file, treat them as a dependency. Must be lower-case strings. """ KEYWORDS = [ '_', '$', 'angular', 'assert', # probably mocha 'backbone', 'batman', 'c3', 'can', 'casper', 'chai', 'chaplin', 'd3', 'define', # probably require 'describe', # mocha or jasmine 'eco', 'ember', 'espresso', 'expect', # probably jasmine 'exports', # probably npm 'express', 'gulp', 'handlebars', 'highcharts', 'jasmine', 'jquery', 'jstz', 'ko', # probably knockout 'm', # probably mithril 'marionette', 'meteor', 'moment', 'monitorio', 'mustache', 'phantom', 'pickadate', 'pikaday', 'qunit', 'react', 'reactive', 'require', # probably the commonjs spec 'ripple', 'rivets', 'socketio', 'spine', 'thorax', 'underscore', 'vue', 'way', 'zombie', ] class HtmlParser(TokenParser): tags = [] opening_tag = False getting_attrs = False current_attr = None current_attr_value = None def parse(self): for index, token, content in self.tokens: self._process_token(token, content) return self.dependencies def _process_token(self, token, content): if u(token) == 'Token.Punctuation': self._process_punctuation(token, content) elif u(token) == 'Token.Name.Tag': self._process_tag(token, content) elif u(token) == 'Token.Literal.String': self._process_string(token, content) elif u(token) == 'Token.Name.Attribute': self._process_attribute(token, content) @property def current_tag(self): return None if len(self.tags) == 0 else self.tags[0] def _process_punctuation(self, token, content): if content.startswith(''): self.opening_tag = False self.getting_attrs = False def _process_tag(self, token, content): if self.opening_tag: self.tags.insert(0, content.replace('<', '', 1).strip().lower()) self.getting_attrs = True self.current_attr = None def _process_attribute(self, token, content): if self.getting_attrs: self.current_attr = content.lower().strip('=') self.current_attr_value = None def _process_string(self, token, content): if self.getting_attrs and self.current_attr is not None: if content.endswith('"') or content.endswith("'"): if self.current_attr_value is not None: self.current_attr_value += content if self.current_tag == 'script' and self.current_attr == 'src': self.append(self.current_attr_value) self.current_attr = None self.current_attr_value = None else: if len(content) == 1: self.current_attr_value = content else: if self.current_tag == 'script' and self.current_attr == 'src': self.append(content) self.current_attr = None self.current_attr_value = None elif content.startswith('"') or content.startswith("'"): if self.current_attr_value is None: self.current_attr_value = content