531 lines
18 KiB
Python
531 lines
18 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
"""
|
||
|
pygments.lexers.data
|
||
|
~~~~~~~~~~~~~~~~~~~~
|
||
|
|
||
|
Lexers for data file format.
|
||
|
|
||
|
:copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
|
||
|
:license: BSD, see LICENSE for details.
|
||
|
"""
|
||
|
|
||
|
import re
|
||
|
|
||
|
from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \
|
||
|
include, bygroups, inherit
|
||
|
from pygments.token import Text, Comment, Keyword, Name, String, Number, \
|
||
|
Punctuation, Literal
|
||
|
|
||
|
__all__ = ['YamlLexer', 'JsonLexer', 'JsonLdLexer']
|
||
|
|
||
|
|
||
|
class YamlLexerContext(LexerContext):
|
||
|
"""Indentation context for the YAML lexer."""
|
||
|
|
||
|
def __init__(self, *args, **kwds):
|
||
|
super(YamlLexerContext, self).__init__(*args, **kwds)
|
||
|
self.indent_stack = []
|
||
|
self.indent = -1
|
||
|
self.next_indent = 0
|
||
|
self.block_scalar_indent = None
|
||
|
|
||
|
|
||
|
class YamlLexer(ExtendedRegexLexer):
|
||
|
"""
|
||
|
Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization
|
||
|
language.
|
||
|
|
||
|
.. versionadded:: 0.11
|
||
|
"""
|
||
|
|
||
|
name = 'YAML'
|
||
|
aliases = ['yaml']
|
||
|
filenames = ['*.yaml', '*.yml']
|
||
|
mimetypes = ['text/x-yaml']
|
||
|
|
||
|
def something(token_class):
|
||
|
"""Do not produce empty tokens."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
if not text:
|
||
|
return
|
||
|
yield match.start(), token_class, text
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def reset_indent(token_class):
|
||
|
"""Reset the indentation levels."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
context.indent_stack = []
|
||
|
context.indent = -1
|
||
|
context.next_indent = 0
|
||
|
context.block_scalar_indent = None
|
||
|
yield match.start(), token_class, text
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def save_indent(token_class, start=False):
|
||
|
"""Save a possible indentation level."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
extra = ''
|
||
|
if start:
|
||
|
context.next_indent = len(text)
|
||
|
if context.next_indent < context.indent:
|
||
|
while context.next_indent < context.indent:
|
||
|
context.indent = context.indent_stack.pop()
|
||
|
if context.next_indent > context.indent:
|
||
|
extra = text[context.indent:]
|
||
|
text = text[:context.indent]
|
||
|
else:
|
||
|
context.next_indent += len(text)
|
||
|
if text:
|
||
|
yield match.start(), token_class, text
|
||
|
if extra:
|
||
|
yield match.start()+len(text), token_class.Error, extra
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def set_indent(token_class, implicit=False):
|
||
|
"""Set the previously saved indentation level."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
if context.indent < context.next_indent:
|
||
|
context.indent_stack.append(context.indent)
|
||
|
context.indent = context.next_indent
|
||
|
if not implicit:
|
||
|
context.next_indent += len(text)
|
||
|
yield match.start(), token_class, text
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def set_block_scalar_indent(token_class):
|
||
|
"""Set an explicit indentation level for a block scalar."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
context.block_scalar_indent = None
|
||
|
if not text:
|
||
|
return
|
||
|
increment = match.group(1)
|
||
|
if increment:
|
||
|
current_indent = max(context.indent, 0)
|
||
|
increment = int(increment)
|
||
|
context.block_scalar_indent = current_indent + increment
|
||
|
if text:
|
||
|
yield match.start(), token_class, text
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def parse_block_scalar_empty_line(indent_token_class, content_token_class):
|
||
|
"""Process an empty line in a block scalar."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
if (context.block_scalar_indent is None or
|
||
|
len(text) <= context.block_scalar_indent):
|
||
|
if text:
|
||
|
yield match.start(), indent_token_class, text
|
||
|
else:
|
||
|
indentation = text[:context.block_scalar_indent]
|
||
|
content = text[context.block_scalar_indent:]
|
||
|
yield match.start(), indent_token_class, indentation
|
||
|
yield (match.start()+context.block_scalar_indent,
|
||
|
content_token_class, content)
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def parse_block_scalar_indent(token_class):
|
||
|
"""Process indentation spaces in a block scalar."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
if context.block_scalar_indent is None:
|
||
|
if len(text) <= max(context.indent, 0):
|
||
|
context.stack.pop()
|
||
|
context.stack.pop()
|
||
|
return
|
||
|
context.block_scalar_indent = len(text)
|
||
|
else:
|
||
|
if len(text) < context.block_scalar_indent:
|
||
|
context.stack.pop()
|
||
|
context.stack.pop()
|
||
|
return
|
||
|
if text:
|
||
|
yield match.start(), token_class, text
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
def parse_plain_scalar_indent(token_class):
|
||
|
"""Process indentation spaces in a plain scalar."""
|
||
|
def callback(lexer, match, context):
|
||
|
text = match.group()
|
||
|
if len(text) <= context.indent:
|
||
|
context.stack.pop()
|
||
|
context.stack.pop()
|
||
|
return
|
||
|
if text:
|
||
|
yield match.start(), token_class, text
|
||
|
context.pos = match.end()
|
||
|
return callback
|
||
|
|
||
|
tokens = {
|
||
|
# the root rules
|
||
|
'root': [
|
||
|
# ignored whitespaces
|
||
|
(r'[ ]+(?=#|$)', Text),
|
||
|
# line breaks
|
||
|
(r'\n+', Text),
|
||
|
# a comment
|
||
|
(r'#[^\n]*', Comment.Single),
|
||
|
# the '%YAML' directive
|
||
|
(r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'),
|
||
|
# the %TAG directive
|
||
|
(r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'),
|
||
|
# document start and document end indicators
|
||
|
(r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace),
|
||
|
'block-line'),
|
||
|
# indentation spaces
|
||
|
(r'[ ]*(?!\s|$)', save_indent(Text, start=True),
|
||
|
('block-line', 'indentation')),
|
||
|
],
|
||
|
|
||
|
# trailing whitespaces after directives or a block scalar indicator
|
||
|
'ignored-line': [
|
||
|
# ignored whitespaces
|
||
|
(r'[ ]+(?=#|$)', Text),
|
||
|
# a comment
|
||
|
(r'#[^\n]*', Comment.Single),
|
||
|
# line break
|
||
|
(r'\n', Text, '#pop:2'),
|
||
|
],
|
||
|
|
||
|
# the %YAML directive
|
||
|
'yaml-directive': [
|
||
|
# the version number
|
||
|
(r'([ ]+)([0-9]+\.[0-9]+)',
|
||
|
bygroups(Text, Number), 'ignored-line'),
|
||
|
],
|
||
|
|
||
|
# the %YAG directive
|
||
|
'tag-directive': [
|
||
|
# a tag handle and the corresponding prefix
|
||
|
(r'([ ]+)(!|![\w-]*!)'
|
||
|
r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)',
|
||
|
bygroups(Text, Keyword.Type, Text, Keyword.Type),
|
||
|
'ignored-line'),
|
||
|
],
|
||
|
|
||
|
# block scalar indicators and indentation spaces
|
||
|
'indentation': [
|
||
|
# trailing whitespaces are ignored
|
||
|
(r'[ ]*$', something(Text), '#pop:2'),
|
||
|
# whitespaces preceeding block collection indicators
|
||
|
(r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)),
|
||
|
# block collection indicators
|
||
|
(r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
|
||
|
# the beginning a block line
|
||
|
(r'[ ]*', save_indent(Text), '#pop'),
|
||
|
],
|
||
|
|
||
|
# an indented line in the block context
|
||
|
'block-line': [
|
||
|
# the line end
|
||
|
(r'[ ]*(?=#|$)', something(Text), '#pop'),
|
||
|
# whitespaces separating tokens
|
||
|
(r'[ ]+', Text),
|
||
|
# tags, anchors and aliases,
|
||
|
include('descriptors'),
|
||
|
# block collections and scalars
|
||
|
include('block-nodes'),
|
||
|
# flow collections and quoted scalars
|
||
|
include('flow-nodes'),
|
||
|
# a plain scalar
|
||
|
(r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)',
|
||
|
something(Name.Variable),
|
||
|
'plain-scalar-in-block-context'),
|
||
|
],
|
||
|
|
||
|
# tags, anchors, aliases
|
||
|
'descriptors': [
|
||
|
# a full-form tag
|
||
|
(r'!<[\w;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type),
|
||
|
# a tag in the form '!', '!suffix' or '!handle!suffix'
|
||
|
(r'!(?:[\w-]+)?'
|
||
|
r'(?:![\w;/?:@&=+$,.!~*\'()\[\]%-]+)?', Keyword.Type),
|
||
|
# an anchor
|
||
|
(r'&[\w-]+', Name.Label),
|
||
|
# an alias
|
||
|
(r'\*[\w-]+', Name.Variable),
|
||
|
],
|
||
|
|
||
|
# block collections and scalars
|
||
|
'block-nodes': [
|
||
|
# implicit key
|
||
|
(r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
|
||
|
# literal and folded scalars
|
||
|
(r'[|>]', Punctuation.Indicator,
|
||
|
('block-scalar-content', 'block-scalar-header')),
|
||
|
],
|
||
|
|
||
|
# flow collections and quoted scalars
|
||
|
'flow-nodes': [
|
||
|
# a flow sequence
|
||
|
(r'\[', Punctuation.Indicator, 'flow-sequence'),
|
||
|
# a flow mapping
|
||
|
(r'\{', Punctuation.Indicator, 'flow-mapping'),
|
||
|
# a single-quoted scalar
|
||
|
(r'\'', String, 'single-quoted-scalar'),
|
||
|
# a double-quoted scalar
|
||
|
(r'\"', String, 'double-quoted-scalar'),
|
||
|
],
|
||
|
|
||
|
# the content of a flow collection
|
||
|
'flow-collection': [
|
||
|
# whitespaces
|
||
|
(r'[ ]+', Text),
|
||
|
# line breaks
|
||
|
(r'\n+', Text),
|
||
|
# a comment
|
||
|
(r'#[^\n]*', Comment.Single),
|
||
|
# simple indicators
|
||
|
(r'[?:,]', Punctuation.Indicator),
|
||
|
# tags, anchors and aliases
|
||
|
include('descriptors'),
|
||
|
# nested collections and quoted scalars
|
||
|
include('flow-nodes'),
|
||
|
# a plain scalar
|
||
|
(r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])',
|
||
|
something(Name.Variable),
|
||
|
'plain-scalar-in-flow-context'),
|
||
|
],
|
||
|
|
||
|
# a flow sequence indicated by '[' and ']'
|
||
|
'flow-sequence': [
|
||
|
# include flow collection rules
|
||
|
include('flow-collection'),
|
||
|
# the closing indicator
|
||
|
(r'\]', Punctuation.Indicator, '#pop'),
|
||
|
],
|
||
|
|
||
|
# a flow mapping indicated by '{' and '}'
|
||
|
'flow-mapping': [
|
||
|
# include flow collection rules
|
||
|
include('flow-collection'),
|
||
|
# the closing indicator
|
||
|
(r'\}', Punctuation.Indicator, '#pop'),
|
||
|
],
|
||
|
|
||
|
# block scalar lines
|
||
|
'block-scalar-content': [
|
||
|
# line break
|
||
|
(r'\n', Text),
|
||
|
# empty line
|
||
|
(r'^[ ]+$',
|
||
|
parse_block_scalar_empty_line(Text, Name.Constant)),
|
||
|
# indentation spaces (we may leave the state here)
|
||
|
(r'^[ ]*', parse_block_scalar_indent(Text)),
|
||
|
# line content
|
||
|
(r'[\S\t ]+', Name.Constant),
|
||
|
],
|
||
|
|
||
|
# the content of a literal or folded scalar
|
||
|
'block-scalar-header': [
|
||
|
# indentation indicator followed by chomping flag
|
||
|
(r'([1-9])?[+-]?(?=[ ]|$)',
|
||
|
set_block_scalar_indent(Punctuation.Indicator),
|
||
|
'ignored-line'),
|
||
|
# chomping flag followed by indentation indicator
|
||
|
(r'[+-]?([1-9])?(?=[ ]|$)',
|
||
|
set_block_scalar_indent(Punctuation.Indicator),
|
||
|
'ignored-line'),
|
||
|
],
|
||
|
|
||
|
# ignored and regular whitespaces in quoted scalars
|
||
|
'quoted-scalar-whitespaces': [
|
||
|
# leading and trailing whitespaces are ignored
|
||
|
(r'^[ ]+', Text),
|
||
|
(r'[ ]+$', Text),
|
||
|
# line breaks are ignored
|
||
|
(r'\n+', Text),
|
||
|
# other whitespaces are a part of the value
|
||
|
(r'[ ]+', Name.Variable),
|
||
|
],
|
||
|
|
||
|
# single-quoted scalars
|
||
|
'single-quoted-scalar': [
|
||
|
# include whitespace and line break rules
|
||
|
include('quoted-scalar-whitespaces'),
|
||
|
# escaping of the quote character
|
||
|
(r'\'\'', String.Escape),
|
||
|
# regular non-whitespace characters
|
||
|
(r'[^\s\']+', String),
|
||
|
# the closing quote
|
||
|
(r'\'', String, '#pop'),
|
||
|
],
|
||
|
|
||
|
# double-quoted scalars
|
||
|
'double-quoted-scalar': [
|
||
|
# include whitespace and line break rules
|
||
|
include('quoted-scalar-whitespaces'),
|
||
|
# escaping of special characters
|
||
|
(r'\\[0abt\tn\nvfre "\\N_LP]', String),
|
||
|
# escape codes
|
||
|
(r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
|
||
|
String.Escape),
|
||
|
# regular non-whitespace characters
|
||
|
(r'[^\s"\\]+', String),
|
||
|
# the closing quote
|
||
|
(r'"', String, '#pop'),
|
||
|
],
|
||
|
|
||
|
# the beginning of a new line while scanning a plain scalar
|
||
|
'plain-scalar-in-block-context-new-line': [
|
||
|
# empty lines
|
||
|
(r'^[ ]+$', Text),
|
||
|
# line breaks
|
||
|
(r'\n+', Text),
|
||
|
# document start and document end indicators
|
||
|
(r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'),
|
||
|
# indentation spaces (we may leave the block line state here)
|
||
|
(r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'),
|
||
|
],
|
||
|
|
||
|
# a plain scalar in the block context
|
||
|
'plain-scalar-in-block-context': [
|
||
|
# the scalar ends with the ':' indicator
|
||
|
(r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'),
|
||
|
# the scalar ends with whitespaces followed by a comment
|
||
|
(r'[ ]+(?=#)', Text, '#pop'),
|
||
|
# trailing whitespaces are ignored
|
||
|
(r'[ ]+$', Text),
|
||
|
# line breaks are ignored
|
||
|
(r'\n+', Text, 'plain-scalar-in-block-context-new-line'),
|
||
|
# other whitespaces are a part of the value
|
||
|
(r'[ ]+', Literal.Scalar.Plain),
|
||
|
# regular non-whitespace characters
|
||
|
(r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain),
|
||
|
],
|
||
|
|
||
|
# a plain scalar is the flow context
|
||
|
'plain-scalar-in-flow-context': [
|
||
|
# the scalar ends with an indicator character
|
||
|
(r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'),
|
||
|
# the scalar ends with a comment
|
||
|
(r'[ ]+(?=#)', Text, '#pop'),
|
||
|
# leading and trailing whitespaces are ignored
|
||
|
(r'^[ ]+', Text),
|
||
|
(r'[ ]+$', Text),
|
||
|
# line breaks are ignored
|
||
|
(r'\n+', Text),
|
||
|
# other whitespaces are a part of the value
|
||
|
(r'[ ]+', Name.Variable),
|
||
|
# regular non-whitespace characters
|
||
|
(r'[^\s,:?\[\]{}]+', Name.Variable),
|
||
|
],
|
||
|
|
||
|
}
|
||
|
|
||
|
def get_tokens_unprocessed(self, text=None, context=None):
|
||
|
if context is None:
|
||
|
context = YamlLexerContext(text, 0)
|
||
|
return super(YamlLexer, self).get_tokens_unprocessed(text, context)
|
||
|
|
||
|
|
||
|
class JsonLexer(RegexLexer):
|
||
|
"""
|
||
|
For JSON data structures.
|
||
|
|
||
|
.. versionadded:: 1.5
|
||
|
"""
|
||
|
|
||
|
name = 'JSON'
|
||
|
aliases = ['json']
|
||
|
filenames = ['*.json']
|
||
|
mimetypes = ['application/json']
|
||
|
|
||
|
flags = re.DOTALL
|
||
|
|
||
|
# integer part of a number
|
||
|
int_part = r'-?(0|[1-9]\d*)'
|
||
|
|
||
|
# fractional part of a number
|
||
|
frac_part = r'\.\d+'
|
||
|
|
||
|
# exponential part of a number
|
||
|
exp_part = r'[eE](\+|-)?\d+'
|
||
|
|
||
|
tokens = {
|
||
|
'whitespace': [
|
||
|
(r'\s+', Text),
|
||
|
],
|
||
|
|
||
|
# represents a simple terminal value
|
||
|
'simplevalue': [
|
||
|
(r'(true|false|null)\b', Keyword.Constant),
|
||
|
(('%(int_part)s(%(frac_part)s%(exp_part)s|'
|
||
|
'%(exp_part)s|%(frac_part)s)') % vars(),
|
||
|
Number.Float),
|
||
|
(int_part, Number.Integer),
|
||
|
(r'"(\\\\|\\"|[^"])*"', String.Double),
|
||
|
],
|
||
|
|
||
|
|
||
|
# the right hand side of an object, after the attribute name
|
||
|
'objectattribute': [
|
||
|
include('value'),
|
||
|
(r':', Punctuation),
|
||
|
# comma terminates the attribute but expects more
|
||
|
(r',', Punctuation, '#pop'),
|
||
|
# a closing bracket terminates the entire object, so pop twice
|
||
|
(r'\}', Punctuation, ('#pop', '#pop')),
|
||
|
],
|
||
|
|
||
|
# a json object - { attr, attr, ... }
|
||
|
'objectvalue': [
|
||
|
include('whitespace'),
|
||
|
(r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'),
|
||
|
(r'\}', Punctuation, '#pop'),
|
||
|
],
|
||
|
|
||
|
# json array - [ value, value, ... }
|
||
|
'arrayvalue': [
|
||
|
include('whitespace'),
|
||
|
include('value'),
|
||
|
(r',', Punctuation),
|
||
|
(r'\]', Punctuation, '#pop'),
|
||
|
],
|
||
|
|
||
|
# a json value - either a simple value or a complex value (object or array)
|
||
|
'value': [
|
||
|
include('whitespace'),
|
||
|
include('simplevalue'),
|
||
|
(r'\{', Punctuation, 'objectvalue'),
|
||
|
(r'\[', Punctuation, 'arrayvalue'),
|
||
|
],
|
||
|
|
||
|
# the root of a json document whould be a value
|
||
|
'root': [
|
||
|
include('value'),
|
||
|
],
|
||
|
}
|
||
|
|
||
|
class JsonLdLexer(JsonLexer):
|
||
|
"""
|
||
|
For `JSON-LD <http://json-ld.org/>`_ linked data.
|
||
|
|
||
|
.. versionadded:: 2.0
|
||
|
"""
|
||
|
|
||
|
name = 'JSON-LD'
|
||
|
aliases = ['jsonld', 'json-ld']
|
||
|
filenames = ['*.jsonld']
|
||
|
mimetypes = ['application/ld+json']
|
||
|
|
||
|
tokens = {
|
||
|
'objectvalue': [
|
||
|
(r'"@(context|id|value|language|type|container|list|set|'
|
||
|
r'reverse|index|base|vocab|graph)"', Name.Decorator,
|
||
|
'objectattribute'),
|
||
|
inherit,
|
||
|
],
|
||
|
}
|