rana-cli/wakatime/dependencies/html.py

# -*- coding: utf-8 -*-
"""
    wakatime.dependencies.html
    ~~~~~~~~~~~~~~~~~~~~~~~~~~

    Parse dependencies from HTML.

    :copyright: (c) 2014 Alan Hamlett.
    :license: BSD, see LICENSE for more details.
"""

from . import TokenParser
from ..compat import u


""" If these keywords are found in the source file, treat them as a dependency.
Must be lower-case strings.
"""
KEYWORDS = [
    '_',
    '$',
    'angular',
    'assert',  # probably mocha
    'backbone',
    'batman',
    'c3',
    'can',
    'casper',
    'chai',
    'chaplin',
    'd3',
    'define',  # probably require
    'describe',  # mocha or jasmine
    'eco',
    'ember',
    'espresso',
    'expect',  # probably jasmine
    'exports',  # probably npm
    'express',
    'gulp',
    'handlebars',
    'highcharts',
    'jasmine',
    'jquery',
    'jstz',
    'ko',  # probably knockout
    'm',  # probably mithril
    'marionette',
    'meteor',
    'moment',
    'monitorio',
    'mustache',
    'phantom',
    'pickadate',
    'pikaday',
    'qunit',
    'react',
    'reactive',
    'require',  # probably the commonjs spec
    'ripple',
    'rivets',
    'socketio',
    'spine',
    'thorax',
    'underscore',
    'vue',
    'way',
    'zombie',
]


class HtmlParser(TokenParser):
    tags = []
    opening_tag = False
    getting_attrs = False
    current_attr = None
    current_attr_value = None

    def parse(self):
        for index, token, content in self.tokens:
            self._process_token(token, content)
        return self.dependencies

    def _process_token(self, token, content):
        if u(token) == 'Token.Punctuation':
            self._process_punctuation(token, content)
        elif u(token) == 'Token.Name.Tag':
            self._process_tag(token, content)
        elif u(token) == 'Token.Literal.String':
            self._process_string(token, content)
        elif u(token) == 'Token.Name.Attribute':
            self._process_attribute(token, content)

    @property
    def current_tag(self):
        return None if len(self.tags) == 0 else self.tags[0]

    def _process_punctuation(self, token, content):
        if content.startswith('</') or content.startswith('/'):
            try:
                self.tags.pop(0)
            except IndexError:
                # ignore errors from malformed markup
                pass
            self.opening_tag = False
            self.getting_attrs = False
        elif content.startswith('<'):
            self.opening_tag = True
        elif content.startswith('>'):
            self.opening_tag = False
            self.getting_attrs = False

    def _process_tag(self, token, content):
        if self.opening_tag:
            self.tags.insert(0, content.replace('<', '', 1).strip().lower())
            self.getting_attrs = True
        self.current_attr = None

    def _process_attribute(self, token, content):
        if self.getting_attrs:
            self.current_attr = content.lower().strip('=')
        self.current_attr_value = None

    def _process_string(self, token, content):
        if self.getting_attrs and self.current_attr is not None:
            if content.endswith('"') or content.endswith("'"):
                if self.current_attr_value is not None:
                    self.current_attr_value += content
                    if self.current_tag == 'script' and self.current_attr == 'src':
                        self.append(self.current_attr_value)
                    self.current_attr = None
                    self.current_attr_value = None
                else:
                    if len(content) == 1:
                        self.current_attr_value = content
                    else:
                        if self.current_tag == 'script' and self.current_attr == 'src':
                            self.append(content)
                        self.current_attr = None
                        self.current_attr_value = None
            elif content.startswith('"') or content.startswith("'"):
                if self.current_attr_value is None:
                    self.current_attr_value = content
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00			`# -- coding: utf-8 --`
			`"""`
support for JavaScript imports 2018-03-11 20:29:30 +00:00			`wakatime.dependencies.html`
			`~~~~~~~~~~~~~~~~~~~~~~~~~~`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00
support for JavaScript imports 2018-03-11 20:29:30 +00:00			`Parse dependencies from HTML.`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00
			`:copyright: (c) 2014 Alan Hamlett.`
			`:license: BSD, see LICENSE for more details.`
			`"""`

			`from . import TokenParser`
			`from ..compat import u`


			`""" If these keywords are found in the source file, treat them as a dependency.`
			`Must be lower-case strings.`
			`"""`
			`KEYWORDS = [`
			`'_',`
			`'$',`
			`'angular',`
fix pep8 linting errors 2017-10-24 05:01:31 +00:00			`'assert', # probably mocha`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00			`'backbone',`
			`'batman',`
			`'c3',`
			`'can',`
			`'casper',`
			`'chai',`
			`'chaplin',`
			`'d3',`
fix pep8 linting errors 2017-10-24 05:01:31 +00:00			`'define', # probably require`
			`'describe', # mocha or jasmine`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00			`'eco',`
			`'ember',`
			`'espresso',`
fix pep8 linting errors 2017-10-24 05:01:31 +00:00			`'expect', # probably jasmine`
			`'exports', # probably npm`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00			`'express',`
			`'gulp',`
			`'handlebars',`
			`'highcharts',`
			`'jasmine',`
			`'jquery',`
			`'jstz',`
fix pep8 linting errors 2017-10-24 05:01:31 +00:00			`'ko', # probably knockout`
			`'m', # probably mithril`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00			`'marionette',`
			`'meteor',`
			`'moment',`
			`'monitorio',`
			`'mustache',`
			`'phantom',`
			`'pickadate',`
			`'pikaday',`
			`'qunit',`
			`'react',`
			`'reactive',`
fix pep8 linting errors 2017-10-24 05:01:31 +00:00			`'require', # probably the commonjs spec`
parse bower and npm dependencies. parse dependencies from javascript files. 2014-12-25 06:58:56 +00:00			`'ripple',`
			`'rivets',`
			`'socketio',`
			`'spine',`
			`'thorax',`
			`'underscore',`
			`'vue',`
			`'way',`
			`'zombie',`
			`]`


support for JavaScript imports 2018-03-11 20:29:30 +00:00			`class HtmlParser(TokenParser):`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`tags = []`
upgrade pygments to v2.1.3 2016-06-13 14:41:17 +00:00			`opening_tag = False`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`getting_attrs = False`
			`current_attr = None`
			`current_attr_value = None`

tests for c and cpp dependency detection. change api of TokenParser class. 2015-09-26 20:04:35 +00:00			`def parse(self):`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`for index, token, content in self.tokens:`
			`self._process_token(token, content)`
			`return self.dependencies`

			`def _process_token(self, token, content):`
upgrade pygments to v2.1.3 2016-06-13 14:41:17 +00:00			`if u(token) == 'Token.Punctuation':`
			`self._process_punctuation(token, content)`
			`elif u(token) == 'Token.Name.Tag':`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`self._process_tag(token, content)`
			`elif u(token) == 'Token.Literal.String':`
			`self._process_string(token, content)`
			`elif u(token) == 'Token.Name.Attribute':`
			`self._process_attribute(token, content)`

			`@property`
			`def current_tag(self):`
			`return None if len(self.tags) == 0 else self.tags[0]`

upgrade pygments to v2.1.3 2016-06-13 14:41:17 +00:00			`def _process_punctuation(self, token, content):`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`if content.startswith('</') or content.startswith('/'):`
ignore malformed markup errors 2015-01-13 21:58:55 +00:00			`try:`
			`self.tags.pop(0)`
			`except IndexError:`
			`# ignore errors from malformed markup`
			`pass`
upgrade pygments to v2.1.3 2016-06-13 14:41:17 +00:00			`self.opening_tag = False`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`self.getting_attrs = False`
			`elif content.startswith('<'):`
upgrade pygments to v2.1.3 2016-06-13 14:41:17 +00:00			`self.opening_tag = True`
			`elif content.startswith('>'):`
			`self.opening_tag = False`
			`self.getting_attrs = False`

			`def _process_tag(self, token, content):`
			`if self.opening_tag:`
parse js dependencies from script tags in html template files 2014-12-25 19:33:07 +00:00			`self.tags.insert(0, content.replace('<', '', 1).strip().lower())`
			`self.getting_attrs = True`
			`self.current_attr = None`

			`def _process_attribute(self, token, content):`
			`if self.getting_attrs:`
			`self.current_attr = content.lower().strip('=')`
			`self.current_attr_value = None`

			`def _process_string(self, token, content):`
			`if self.getting_attrs and self.current_attr is not None:`
			`if content.endswith('"') or content.endswith("'"):`
			`if self.current_attr_value is not None:`
			`self.current_attr_value += content`
			`if self.current_tag == 'script' and self.current_attr == 'src':`
			`self.append(self.current_attr_value)`
			`self.current_attr = None`
			`self.current_attr_value = None`
			`else:`
			`if len(content) == 1:`
			`self.current_attr_value = content`
			`else:`
			`if self.current_tag == 'script' and self.current_attr == 'src':`
			`self.append(content)`
			`self.current_attr = None`
			`self.current_attr_value = None`
			`elif content.startswith('"') or content.startswith("'"):`
			`if self.current_attr_value is None:`
			`self.current_attr_value = content`