# -*- coding: utf-8 -*- """ pygments.lexers.clean ~~~~~~~~~~~~~~~~~~~~~ Lexer for the Clean language. :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ from pygments.lexer import ExtendedRegexLexer, LexerContext, \ bygroups, words, include, default from pygments.token import Comment, Keyword, Literal, Name, Number, Operator, \ Punctuation, String, Text, Whitespace __all__ = ['CleanLexer'] class CleanLexer(ExtendedRegexLexer): """ Lexer for the general purpose, state-of-the-art, pure and lazy functional programming language Clean (http://clean.cs.ru.nl/Clean). .. versionadded: 2.2 """ name = 'Clean' aliases = ['clean'] filenames = ['*.icl', '*.dcl'] def get_tokens_unprocessed(self, text=None, context=None): ctx = LexerContext(text, 0) ctx.indent = 0 return ExtendedRegexLexer.get_tokens_unprocessed(self, text, context=ctx) def check_class_not_import(lexer, match, ctx): if match.group(0) == 'import': yield match.start(), Keyword.Namespace, match.group(0) ctx.stack = ctx.stack[:-1] + ['fromimportfunc'] else: yield match.start(), Name.Class, match.group(0) ctx.pos = match.end() def check_instance_class(lexer, match, ctx): if match.group(0) == 'instance' or match.group(0) == 'class': yield match.start(), Keyword, match.group(0) else: yield match.start(), Name.Function, match.group(0) ctx.stack = ctx.stack + ['fromimportfunctype'] ctx.pos = match.end() @staticmethod def indent_len(text): # Tabs are four spaces: # https://svn.cs.ru.nl/repos/clean-platform/trunk/doc/STANDARDS.txt text = text.replace('\n', '') return len(text.replace('\t', ' ')), len(text) def store_indent(lexer, match, ctx): ctx.indent, _ = CleanLexer.indent_len(match.group(0)) ctx.pos = match.end() yield match.start(), Text, match.group(0) def check_indent1(lexer, match, ctx): indent, reallen = CleanLexer.indent_len(match.group(0)) if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + reallen + 1 else: ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-1] yield match.start(), Whitespace, match.group(0)[1:] def check_indent2(lexer, match, ctx): indent, reallen = CleanLexer.indent_len(match.group(0)) if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + reallen + 1 else: ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-2] def check_indent3(lexer, match, ctx): indent, reallen = CleanLexer.indent_len(match.group(0)) if indent > ctx.indent: yield match.start(), Whitespace, match.group(0) ctx.pos = match.start() + reallen + 1 else: ctx.indent = 0 ctx.pos = match.start() ctx.stack = ctx.stack[:-3] yield match.start(), Whitespace, match.group(0)[1:] if match.group(0) == '\n\n': ctx.pos = ctx.pos + 1 def skip(lexer, match, ctx): ctx.stack = ctx.stack[:-1] ctx.pos = match.end() yield match.start(), Comment, match.group(0) keywords = ('class', 'instance', 'where', 'with', 'let', 'let!', 'in', 'case', 'of', 'infix', 'infixr', 'infixl', 'generic', 'derive', 'otherwise', 'code', 'inline') tokens = { 'common': [ (r';', Punctuation, '#pop'), (r'//', Comment, 'singlecomment'), ], 'root': [ # Comments (r'//.*\n', Comment.Single), (r'(?s)/\*\*.*?\*/', Comment.Special), (r'(?s)/\*.*?\*/', Comment.Multi), # Modules, imports, etc. (r'\b((?:implementation|definition|system)\s+)?(module)(\s+)([\w`.]+)', bygroups(Keyword.Namespace, Keyword.Namespace, Text, Name.Class)), (r'(?<=\n)import(?=\s)', Keyword.Namespace, 'import'), (r'(?<=\n)from(?=\s)', Keyword.Namespace, 'fromimport'), # Keywords # We cannot use (?s)^|(?<=\s) as prefix, so need to repeat this (words(keywords, prefix=r'(?<=\s)', suffix=r'(?=\s)'), Keyword), (words(keywords, prefix=r'^', suffix=r'(?=\s)'), Keyword), # Function definitions (r'(?=\{\|)', Whitespace, 'genericfunction'), (r'(?<=\n)([ \t]*)([\w`$()=\-<>~*\^|+&%]+)((?:\s+\w)*)(\s*)(::)', bygroups(store_indent, Name.Function, Keyword.Type, Whitespace, Punctuation), 'functiondefargs'), # Type definitions (r'(?<=\n)([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), (r'^([ \t]*)(::)', bygroups(store_indent, Punctuation), 'typedef'), # Literals (r'\'\\?.(?|&~*\^/]', Operator), (r'\\\\', Operator), # Lambda expressions (r'\\.*?(->|\.|=)', Name.Function), # Whitespace (r'\s', Whitespace), include('common'), ], 'fromimport': [ include('common'), (r'([\w`.]+)', check_class_not_import), (r'\n', Whitespace, '#pop'), (r'\s', Whitespace), ], 'fromimportfunc': [ include('common'), (r'(::)(\s+)([^,\s]+)', bygroups(Punctuation, Text, Keyword.Type)), (r'([\w`$()=\-<>~*\^|+&%/]+)', check_instance_class), (r',', Punctuation), (r'\n', Whitespace, '#pop'), (r'\s', Whitespace), ], 'fromimportfunctype': [ include('common'), (r'[{(\[]', Punctuation, 'combtype'), (r',', Punctuation, '#pop'), (r'[:;.#]', Punctuation), (r'\n', Whitespace, '#pop:2'), (r'[^\S\n]+', Whitespace), (r'\S+', Keyword.Type), ], 'combtype': [ include('common'), (r'[})\]]', Punctuation, '#pop'), (r'[{(\[]', Punctuation, '#pop'), (r'[,:;.#]', Punctuation), (r'\s+', Whitespace), (r'\S+', Keyword.Type), ], 'import': [ include('common'), (words(('from', 'import', 'as', 'qualified'), prefix='(?<=\s)', suffix='(?=\s)'), Keyword.Namespace), (r'[\w`.]+', Name.Class), (r'\n', Whitespace, '#pop'), (r',', Punctuation), (r'[^\S\n]+', Whitespace), ], 'singlecomment': [ (r'(.)(?=\n)', skip), (r'.+(?!\n)', Comment), ], 'doubleqstring': [ (r'[^\\"]+', String.Double), (r'"', String.Double, '#pop'), (r'\\.', String.Double), ], 'typedef': [ include('common'), (r'[\w`]+', Keyword.Type), (r'[:=|(),\[\]{}!*]', Punctuation), (r'->', Punctuation), (r'\n(?=[^\s|])', Whitespace, '#pop'), (r'\s', Whitespace), (r'.', Keyword.Type), ], 'genericfunction': [ include('common'), (r'\{\|', Punctuation), (r'\|\}', Punctuation, '#pop'), (r',', Punctuation), (r'->', Punctuation), (r'(\s+of\s+)(\{)', bygroups(Keyword, Punctuation), 'genericftypes'), (r'\s', Whitespace), (r'[\w`\[\]{}!]+', Keyword.Type), (r'[*()]', Punctuation), ], 'genericftypes': [ include('common'), (r'[\w`]+', Keyword.Type), (r',', Punctuation), (r'\s', Whitespace), (r'\}', Punctuation, '#pop'), ], 'functiondefargs': [ include('common'), (r'\n(\s*)', check_indent1), (r'[!{}()\[\],:;.#]', Punctuation), (r'->', Punctuation, 'functiondefres'), (r'^(?=\S)', Whitespace, '#pop'), (r'\S', Keyword.Type), (r'\s', Whitespace), ], 'functiondefres': [ include('common'), (r'\n(\s*)', check_indent2), (r'^(?=\S)', Whitespace, '#pop:2'), (r'[!{}()\[\],:;.#]', Punctuation), (r'\|', Punctuation, 'functiondefclasses'), (r'\S', Keyword.Type), (r'\s', Whitespace), ], 'functiondefclasses': [ include('common'), (r'\n(\s*)', check_indent3), (r'^(?=\S)', Whitespace, '#pop:3'), (r'[,&]', Punctuation), (r'\[', Punctuation, 'functiondefuniquneq'), (r'[\w`$()=\-<>~*\^|+&%/{}\[\]@]', Name.Function, 'functionname'), (r'\s+', Whitespace), ], 'functiondefuniquneq': [ include('common'), (r'[a-z]+', Keyword.Type), (r'\s+', Whitespace), (r'<=|,', Punctuation), (r'\]', Punctuation, '#pop') ], 'functionname': [ include('common'), (r'[\w`$()=\-<>~*\^|+&%/]+', Name.Function), (r'(?=\{\|)', Punctuation, 'genericfunction'), default('#pop'), ] }