# -*- coding: utf-8 -*- """ pygments.lexers.data ~~~~~~~~~~~~~~~~~~~~ Lexers for data file format. :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \ include, bygroups, inherit from pygments.token import Text, Comment, Keyword, Name, String, Number, \ Punctuation, Literal __all__ = ['YamlLexer', 'JsonLexer', 'JsonLdLexer'] class YamlLexerContext(LexerContext): """Indentation context for the YAML lexer.""" def __init__(self, *args, **kwds): super(YamlLexerContext, self).__init__(*args, **kwds) self.indent_stack = [] self.indent = -1 self.next_indent = 0 self.block_scalar_indent = None class YamlLexer(ExtendedRegexLexer): """ Lexer for `YAML `_, a human-friendly data serialization language. .. versionadded:: 0.11 """ name = 'YAML' aliases = ['yaml'] filenames = ['*.yaml', '*.yml'] mimetypes = ['text/x-yaml'] def something(token_class): """Do not produce empty tokens.""" def callback(lexer, match, context): text = match.group() if not text: return yield match.start(), token_class, text context.pos = match.end() return callback def reset_indent(token_class): """Reset the indentation levels.""" def callback(lexer, match, context): text = match.group() context.indent_stack = [] context.indent = -1 context.next_indent = 0 context.block_scalar_indent = None yield match.start(), token_class, text context.pos = match.end() return callback def save_indent(token_class, start=False): """Save a possible indentation level.""" def callback(lexer, match, context): text = match.group() extra = '' if start: context.next_indent = len(text) if context.next_indent < context.indent: while context.next_indent < context.indent: context.indent = context.indent_stack.pop() if context.next_indent > context.indent: extra = text[context.indent:] text = text[:context.indent] else: context.next_indent += len(text) if text: yield match.start(), token_class, text if extra: yield match.start()+len(text), token_class.Error, extra context.pos = match.end() return callback def set_indent(token_class, implicit=False): """Set the previously saved indentation level.""" def callback(lexer, match, context): text = match.group() if context.indent < context.next_indent: context.indent_stack.append(context.indent) context.indent = context.next_indent if not implicit: context.next_indent += len(text) yield match.start(), token_class, text context.pos = match.end() return callback def set_block_scalar_indent(token_class): """Set an explicit indentation level for a block scalar.""" def callback(lexer, match, context): text = match.group() context.block_scalar_indent = None if not text: return increment = match.group(1) if increment: current_indent = max(context.indent, 0) increment = int(increment) context.block_scalar_indent = current_indent + increment if text: yield match.start(), token_class, text context.pos = match.end() return callback def parse_block_scalar_empty_line(indent_token_class, content_token_class): """Process an empty line in a block scalar.""" def callback(lexer, match, context): text = match.group() if (context.block_scalar_indent is None or len(text) <= context.block_scalar_indent): if text: yield match.start(), indent_token_class, text else: indentation = text[:context.block_scalar_indent] content = text[context.block_scalar_indent:] yield match.start(), indent_token_class, indentation yield (match.start()+context.block_scalar_indent, content_token_class, content) context.pos = match.end() return callback def parse_block_scalar_indent(token_class): """Process indentation spaces in a block scalar.""" def callback(lexer, match, context): text = match.group() if context.block_scalar_indent is None: if len(text) <= max(context.indent, 0): context.stack.pop() context.stack.pop() return context.block_scalar_indent = len(text) else: if len(text) < context.block_scalar_indent: context.stack.pop() context.stack.pop() return if text: yield match.start(), token_class, text context.pos = match.end() return callback def parse_plain_scalar_indent(token_class): """Process indentation spaces in a plain scalar.""" def callback(lexer, match, context): text = match.group() if len(text) <= context.indent: context.stack.pop() context.stack.pop() return if text: yield match.start(), token_class, text context.pos = match.end() return callback tokens = { # the root rules 'root': [ # ignored whitespaces (r'[ ]+(?=#|$)', Text), # line breaks (r'\n+', Text), # a comment (r'#[^\n]*', Comment.Single), # the '%YAML' directive (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), # the %TAG directive (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), # document start and document end indicators (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), 'block-line'), # indentation spaces (r'[ ]*(?!\s|$)', save_indent(Text, start=True), ('block-line', 'indentation')), ], # trailing whitespaces after directives or a block scalar indicator 'ignored-line': [ # ignored whitespaces (r'[ ]+(?=#|$)', Text), # a comment (r'#[^\n]*', Comment.Single), # line break (r'\n', Text, '#pop:2'), ], # the %YAML directive 'yaml-directive': [ # the version number (r'([ ]+)([0-9]+\.[0-9]+)', bygroups(Text, Number), 'ignored-line'), ], # the %YAG directive 'tag-directive': [ # a tag handle and the corresponding prefix (r'([ ]+)(!|![\w-]*!)' r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)', bygroups(Text, Keyword.Type, Text, Keyword.Type), 'ignored-line'), ], # block scalar indicators and indentation spaces 'indentation': [ # trailing whitespaces are ignored (r'[ ]*$', something(Text), '#pop:2'), # whitespaces preceeding block collection indicators (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), # block collection indicators (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), # the beginning a block line (r'[ ]*', save_indent(Text), '#pop'), ], # an indented line in the block context 'block-line': [ # the line end (r'[ ]*(?=#|$)', something(Text), '#pop'), # whitespaces separating tokens (r'[ ]+', Text), # tags, anchors and aliases, include('descriptors'), # block collections and scalars include('block-nodes'), # flow collections and quoted scalars include('flow-nodes'), # a plain scalar (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)', something(Name.Variable), 'plain-scalar-in-block-context'), ], # tags, anchors, aliases 'descriptors': [ # a full-form tag (r'!<[\w;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type), # a tag in the form '!', '!suffix' or '!handle!suffix' (r'!(?:[\w-]+)?' r'(?:![\w;/?:@&=+$,.!~*\'()\[\]%-]+)?', Keyword.Type), # an anchor (r'&[\w-]+', Name.Label), # an alias (r'\*[\w-]+', Name.Variable), ], # block collections and scalars 'block-nodes': [ # implicit key (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), # literal and folded scalars (r'[|>]', Punctuation.Indicator, ('block-scalar-content', 'block-scalar-header')), ], # flow collections and quoted scalars 'flow-nodes': [ # a flow sequence (r'\[', Punctuation.Indicator, 'flow-sequence'), # a flow mapping (r'\{', Punctuation.Indicator, 'flow-mapping'), # a single-quoted scalar (r'\'', String, 'single-quoted-scalar'), # a double-quoted scalar (r'\"', String, 'double-quoted-scalar'), ], # the content of a flow collection 'flow-collection': [ # whitespaces (r'[ ]+', Text), # line breaks (r'\n+', Text), # a comment (r'#[^\n]*', Comment.Single), # simple indicators (r'[?:,]', Punctuation.Indicator), # tags, anchors and aliases include('descriptors'), # nested collections and quoted scalars include('flow-nodes'), # a plain scalar (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])', something(Name.Variable), 'plain-scalar-in-flow-context'), ], # a flow sequence indicated by '[' and ']' 'flow-sequence': [ # include flow collection rules include('flow-collection'), # the closing indicator (r'\]', Punctuation.Indicator, '#pop'), ], # a flow mapping indicated by '{' and '}' 'flow-mapping': [ # include flow collection rules include('flow-collection'), # the closing indicator (r'\}', Punctuation.Indicator, '#pop'), ], # block scalar lines 'block-scalar-content': [ # line break (r'\n', Text), # empty line (r'^[ ]+$', parse_block_scalar_empty_line(Text, Name.Constant)), # indentation spaces (we may leave the state here) (r'^[ ]*', parse_block_scalar_indent(Text)), # line content (r'[\S\t ]+', Name.Constant), ], # the content of a literal or folded scalar 'block-scalar-header': [ # indentation indicator followed by chomping flag (r'([1-9])?[+-]?(?=[ ]|$)', set_block_scalar_indent(Punctuation.Indicator), 'ignored-line'), # chomping flag followed by indentation indicator (r'[+-]?([1-9])?(?=[ ]|$)', set_block_scalar_indent(Punctuation.Indicator), 'ignored-line'), ], # ignored and regular whitespaces in quoted scalars 'quoted-scalar-whitespaces': [ # leading and trailing whitespaces are ignored (r'^[ ]+', Text), (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value (r'[ ]+', Name.Variable), ], # single-quoted scalars 'single-quoted-scalar': [ # include whitespace and line break rules include('quoted-scalar-whitespaces'), # escaping of the quote character (r'\'\'', String.Escape), # regular non-whitespace characters (r'[^\s\']+', String), # the closing quote (r'\'', String, '#pop'), ], # double-quoted scalars 'double-quoted-scalar': [ # include whitespace and line break rules include('quoted-scalar-whitespaces'), # escaping of special characters (r'\\[0abt\tn\nvfre "\\N_LP]', String), # escape codes (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', String.Escape), # regular non-whitespace characters (r'[^\s"\\]+', String), # the closing quote (r'"', String, '#pop'), ], # the beginning of a new line while scanning a plain scalar 'plain-scalar-in-block-context-new-line': [ # empty lines (r'^[ ]+$', Text), # line breaks (r'\n+', Text), # document start and document end indicators (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), # indentation spaces (we may leave the block line state here) (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), ], # a plain scalar in the block context 'plain-scalar-in-block-context': [ # the scalar ends with the ':' indicator (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), # the scalar ends with whitespaces followed by a comment (r'[ ]+(?=#)', Text, '#pop'), # trailing whitespaces are ignored (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), # other whitespaces are a part of the value (r'[ ]+', Literal.Scalar.Plain), # regular non-whitespace characters (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain), ], # a plain scalar is the flow context 'plain-scalar-in-flow-context': [ # the scalar ends with an indicator character (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), # the scalar ends with a comment (r'[ ]+(?=#)', Text, '#pop'), # leading and trailing whitespaces are ignored (r'^[ ]+', Text), (r'[ ]+$', Text), # line breaks are ignored (r'\n+', Text), # other whitespaces are a part of the value (r'[ ]+', Name.Variable), # regular non-whitespace characters (r'[^\s,:?\[\]{}]+', Name.Variable), ], } def get_tokens_unprocessed(self, text=None, context=None): if context is None: context = YamlLexerContext(text, 0) return super(YamlLexer, self).get_tokens_unprocessed(text, context) class JsonLexer(RegexLexer): """ For JSON data structures. .. versionadded:: 1.5 """ name = 'JSON' aliases = ['json'] filenames = ['*.json'] mimetypes = ['application/json'] flags = re.DOTALL # integer part of a number int_part = r'-?(0|[1-9]\d*)' # fractional part of a number frac_part = r'\.\d+' # exponential part of a number exp_part = r'[eE](\+|-)?\d+' tokens = { 'whitespace': [ (r'\s+', Text), ], # represents a simple terminal value 'simplevalue': [ (r'(true|false|null)\b', Keyword.Constant), (('%(int_part)s(%(frac_part)s%(exp_part)s|' '%(exp_part)s|%(frac_part)s)') % vars(), Number.Float), (int_part, Number.Integer), (r'"(\\\\|\\"|[^"])*"', String.Double), ], # the right hand side of an object, after the attribute name 'objectattribute': [ include('value'), (r':', Punctuation), # comma terminates the attribute but expects more (r',', Punctuation, '#pop'), # a closing bracket terminates the entire object, so pop twice (r'\}', Punctuation, ('#pop', '#pop')), ], # a json object - { attr, attr, ... } 'objectvalue': [ include('whitespace'), (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'), (r'\}', Punctuation, '#pop'), ], # json array - [ value, value, ... } 'arrayvalue': [ include('whitespace'), include('value'), (r',', Punctuation), (r'\]', Punctuation, '#pop'), ], # a json value - either a simple value or a complex value (object or array) 'value': [ include('whitespace'), include('simplevalue'), (r'\{', Punctuation, 'objectvalue'), (r'\[', Punctuation, 'arrayvalue'), ], # the root of a json document whould be a value 'root': [ include('value'), ], } class JsonLdLexer(JsonLexer): """ For `JSON-LD `_ linked data. .. versionadded:: 2.0 """ name = 'JSON-LD' aliases = ['jsonld', 'json-ld'] filenames = ['*.jsonld'] mimetypes = ['application/ld+json'] tokens = { 'objectvalue': [ (r'"@(context|id|value|language|type|container|list|set|' r'reverse|index|base|vocab|graph)"', Name.Decorator, 'objectattribute'), inherit, ], }