# -*- coding: utf-8 -*- """ pygments.lexers.pascal ~~~~~~~~~~~~~~~~~~~~~~ Lexers for Pascal family languages. :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import re from pygments.lexer import Lexer, RegexLexer, include, bygroups, words, \ using, this, default from pygments.util import get_bool_opt, get_list_opt from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ Number, Punctuation, Error from pygments.scanner import Scanner __all__ = ['DelphiLexer', 'Modula2Lexer', 'AdaLexer'] class DelphiLexer(Lexer): """ For `Delphi `_ (Borland Object Pascal), Turbo Pascal and Free Pascal source code. Additional options accepted: `turbopascal` Highlight Turbo Pascal specific keywords (default: ``True``). `delphi` Highlight Borland Delphi specific keywords (default: ``True``). `freepascal` Highlight Free Pascal specific keywords (default: ``True``). `units` A list of units that should be considered builtin, supported are ``System``, ``SysUtils``, ``Classes`` and ``Math``. Default is to consider all of them builtin. """ name = 'Delphi' aliases = ['delphi', 'pas', 'pascal', 'objectpascal'] filenames = ['*.pas'] mimetypes = ['text/x-pascal'] TURBO_PASCAL_KEYWORDS = ( 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case', 'const', 'constructor', 'continue', 'destructor', 'div', 'do', 'downto', 'else', 'end', 'file', 'for', 'function', 'goto', 'if', 'implementation', 'in', 'inherited', 'inline', 'interface', 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator', 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce', 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to', 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor' ) DELPHI_KEYWORDS = ( 'as', 'class', 'except', 'exports', 'finalization', 'finally', 'initialization', 'is', 'library', 'on', 'property', 'raise', 'threadvar', 'try' ) FREE_PASCAL_KEYWORDS = ( 'dispose', 'exit', 'false', 'new', 'true' ) BLOCK_KEYWORDS = set(( 'begin', 'class', 'const', 'constructor', 'destructor', 'end', 'finalization', 'function', 'implementation', 'initialization', 'label', 'library', 'operator', 'procedure', 'program', 'property', 'record', 'threadvar', 'type', 'unit', 'uses', 'var' )) FUNCTION_MODIFIERS = set(( 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe', 'pascal', 'register', 'safecall', 'softfloat', 'stdcall', 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external', 'override', 'assembler' )) # XXX: those aren't global. but currently we know no way for defining # them just for the type context. DIRECTIVES = set(( 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far', 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected', 'published', 'public' )) BUILTIN_TYPES = set(( 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool', 'cardinal', 'char', 'comp', 'currency', 'double', 'dword', 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint', 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean', 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency', 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle', 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer', 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint', 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword', 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint', 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate', 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant', 'widechar', 'widestring', 'word', 'wordbool' )) BUILTIN_UNITS = { 'System': ( 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8', 'append', 'arctan', 'assert', 'assigned', 'assignfile', 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir', 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble', 'concat', 'continue', 'copy', 'cos', 'dec', 'delete', 'dispose', 'doubletocomp', 'endthread', 'enummodules', 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr', 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize', 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance', 'findresourcehinstance', 'flush', 'frac', 'freemem', 'get8087cw', 'getdir', 'getlasterror', 'getmem', 'getmemorymanager', 'getmodulefilename', 'getvariantmanager', 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert', 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset', 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd', 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount', 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random', 'randomize', 'read', 'readln', 'reallocmem', 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir', 'round', 'runerror', 'seek', 'seekeof', 'seekeoln', 'set8087cw', 'setlength', 'setlinebreakstyle', 'setmemorymanager', 'setstring', 'settextbuf', 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt', 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar', 'succ', 'swap', 'trunc', 'truncate', 'typeinfo', 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring', 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi', 'utf8tounicode', 'val', 'vararrayredim', 'varclear', 'widecharlentostring', 'widecharlentostrvar', 'widechartostring', 'widechartostrvar', 'widestringtoucs4string', 'write', 'writeln' ), 'SysUtils': ( 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks', 'allocmem', 'ansicomparefilename', 'ansicomparestr', 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr', 'ansilastchar', 'ansilowercase', 'ansilowercasefilename', 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext', 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp', 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan', 'ansistrscan', 'ansistrupper', 'ansiuppercase', 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep', 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype', 'callterminateprocs', 'changefileext', 'charlength', 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr', 'comparetext', 'createdir', 'createguid', 'currentyear', 'currtostr', 'currtostrf', 'date', 'datetimetofiledate', 'datetimetostr', 'datetimetostring', 'datetimetosystemtime', 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate', 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists', 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime', 'exceptionerrormessage', 'excludetrailingbackslash', 'excludetrailingpathdelimiter', 'expandfilename', 'expandfilenamecase', 'expanduncfilename', 'extractfiledir', 'extractfiledrive', 'extractfileext', 'extractfilename', 'extractfilepath', 'extractrelativepath', 'extractshortpathname', 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime', 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly', 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr', 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage', 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext', 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr', 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr', 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr', 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir', 'getenvironmentvariable', 'getfileversion', 'getformatsettings', 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription', 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth', 'includetrailingbackslash', 'includetrailingpathdelimiter', 'incmonth', 'initializepackage', 'interlockeddecrement', 'interlockedexchange', 'interlockedexchangeadd', 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter', 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident', 'languages', 'lastdelimiter', 'loadpackage', 'loadstr', 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now', 'outofmemoryerror', 'quotedstr', 'raiselastoserror', 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate', 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext', 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize', 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy', 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp', 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy', 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew', 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos', 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr', 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime', 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint', 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime', 'strtotimedef', 'strupper', 'supports', 'syserrormessage', 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime', 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright', 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime', 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime', 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime', 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext', 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase', 'widesamestr', 'widesametext', 'wideuppercase', 'win32check', 'wraptext' ), 'Classes': ( 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize', 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect', 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass', 'groupdescendantswith', 'hextobin', 'identtoint', 'initinheritedcomponent', 'inttoident', 'invalidpoint', 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext', 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource', 'pointsequal', 'readcomponentres', 'readcomponentresex', 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias', 'registerclasses', 'registercomponents', 'registerintegerconsts', 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup', 'teststreamformat', 'unregisterclass', 'unregisterclasses', 'unregisterintegerconsts', 'unregistermoduleclasses', 'writecomponentresfile' ), 'Math': ( 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil', 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc', 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle', 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance', 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask', 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg', 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate', 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero', 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue', 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue', 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods', 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance', 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd', 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant', 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode', 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev', 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation', 'tan', 'tanh', 'totalvariance', 'variance' ) } ASM_REGISTERS = set(( 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0', 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0', 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx', 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp', 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7' )) ASM_INSTRUCTIONS = set(( 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound', 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw', 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae', 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg', 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb', 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb', 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid', 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt', 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd', 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd', 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe', 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle', 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge', 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe', 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave', 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw', 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw', 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr', 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx', 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw', 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw', 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe', 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror', 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb', 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe', 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle', 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng', 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz', 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl', 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold', 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str', 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit', 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait', 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat', 'xlatb', 'xor' )) def __init__(self, **options): Lexer.__init__(self, **options) self.keywords = set() if get_bool_opt(options, 'turbopascal', True): self.keywords.update(self.TURBO_PASCAL_KEYWORDS) if get_bool_opt(options, 'delphi', True): self.keywords.update(self.DELPHI_KEYWORDS) if get_bool_opt(options, 'freepascal', True): self.keywords.update(self.FREE_PASCAL_KEYWORDS) self.builtins = set() for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)): self.builtins.update(self.BUILTIN_UNITS[unit]) def get_tokens_unprocessed(self, text): scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE) stack = ['initial'] in_function_block = False in_property_block = False was_dot = False next_token_is_function = False next_token_is_property = False collect_labels = False block_labels = set() brace_balance = [0, 0] while not scanner.eos: token = Error if stack[-1] == 'initial': if scanner.scan(r'\s+'): token = Text elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): if scanner.match.startswith('$'): token = Comment.Preproc else: token = Comment.Multiline elif scanner.scan(r'//.*?$'): token = Comment.Single elif scanner.scan(r'[-+*\/=<>:;,.@\^]'): token = Operator # stop label highlighting on next ";" if collect_labels and scanner.match == ';': collect_labels = False elif scanner.scan(r'[\(\)\[\]]+'): token = Punctuation # abort function naming ``foo = Function(...)`` next_token_is_function = False # if we are in a function block we count the open # braces because ootherwise it's impossible to # determine the end of the modifier context if in_function_block or in_property_block: if scanner.match == '(': brace_balance[0] += 1 elif scanner.match == ')': brace_balance[0] -= 1 elif scanner.match == '[': brace_balance[1] += 1 elif scanner.match == ']': brace_balance[1] -= 1 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): lowercase_name = scanner.match.lower() if lowercase_name == 'result': token = Name.Builtin.Pseudo elif lowercase_name in self.keywords: token = Keyword # if we are in a special block and a # block ending keyword occours (and the parenthesis # is balanced) we end the current block context if (in_function_block or in_property_block) and \ lowercase_name in self.BLOCK_KEYWORDS and \ brace_balance[0] <= 0 and \ brace_balance[1] <= 0: in_function_block = False in_property_block = False brace_balance = [0, 0] block_labels = set() if lowercase_name in ('label', 'goto'): collect_labels = True elif lowercase_name == 'asm': stack.append('asm') elif lowercase_name == 'property': in_property_block = True next_token_is_property = True elif lowercase_name in ('procedure', 'operator', 'function', 'constructor', 'destructor'): in_function_block = True next_token_is_function = True # we are in a function block and the current name # is in the set of registered modifiers. highlight # it as pseudo keyword elif in_function_block and \ lowercase_name in self.FUNCTION_MODIFIERS: token = Keyword.Pseudo # if we are in a property highlight some more # modifiers elif in_property_block and \ lowercase_name in ('read', 'write'): token = Keyword.Pseudo next_token_is_function = True # if the last iteration set next_token_is_function # to true we now want this name highlighted as # function. so do that and reset the state elif next_token_is_function: # Look if the next token is a dot. If yes it's # not a function, but a class name and the # part after the dot a function name if scanner.test(r'\s*\.\s*'): token = Name.Class # it's not a dot, our job is done else: token = Name.Function next_token_is_function = False # same for properties elif next_token_is_property: token = Name.Property next_token_is_property = False # Highlight this token as label and add it # to the list of known labels elif collect_labels: token = Name.Label block_labels.add(scanner.match.lower()) # name is in list of known labels elif lowercase_name in block_labels: token = Name.Label elif lowercase_name in self.BUILTIN_TYPES: token = Keyword.Type elif lowercase_name in self.DIRECTIVES: token = Keyword.Pseudo # builtins are just builtins if the token # before isn't a dot elif not was_dot and lowercase_name in self.builtins: token = Name.Builtin else: token = Name elif scanner.scan(r"'"): token = String stack.append('string') elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'): token = String.Char elif scanner.scan(r'\$[0-9A-Fa-f]+'): token = Number.Hex elif scanner.scan(r'\d+(?![eE]|\.[^.])'): token = Number.Integer elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): token = Number.Float else: # if the stack depth is deeper than once, pop if len(stack) > 1: stack.pop() scanner.get_char() elif stack[-1] == 'string': if scanner.scan(r"''"): token = String.Escape elif scanner.scan(r"'"): token = String stack.pop() elif scanner.scan(r"[^']*"): token = String else: scanner.get_char() stack.pop() elif stack[-1] == 'asm': if scanner.scan(r'\s+'): token = Text elif scanner.scan(r'end'): token = Keyword stack.pop() elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): if scanner.match.startswith('$'): token = Comment.Preproc else: token = Comment.Multiline elif scanner.scan(r'//.*?$'): token = Comment.Single elif scanner.scan(r"'"): token = String stack.append('string') elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'): token = Name.Label elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): lowercase_name = scanner.match.lower() if lowercase_name in self.ASM_INSTRUCTIONS: token = Keyword elif lowercase_name in self.ASM_REGISTERS: token = Name.Builtin else: token = Name elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'): token = Operator elif scanner.scan(r'[\(\)\[\]]+'): token = Punctuation elif scanner.scan(r'\$[0-9A-Fa-f]+'): token = Number.Hex elif scanner.scan(r'\d+(?![eE]|\.[^.])'): token = Number.Integer elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): token = Number.Float else: scanner.get_char() stack.pop() # save the dot!!!11 if scanner.match.strip(): was_dot = scanner.match == '.' yield scanner.start_pos, token, scanner.match or '' class Modula2Lexer(RegexLexer): """ For `Modula-2 `_ source code. Additional options that determine which keywords are highlighted: `pim` Select PIM Modula-2 dialect (default: True). `iso` Select ISO Modula-2 dialect (default: False). `objm2` Select Objective Modula-2 dialect (default: False). `gm2ext` Also highlight GNU extensions (default: False). .. versionadded:: 1.3 """ name = 'Modula-2' aliases = ['modula2', 'm2'] filenames = ['*.def', '*.mod'] mimetypes = ['text/x-modula2'] flags = re.MULTILINE | re.DOTALL tokens = { 'whitespace': [ (r'\n+', Text), # blank lines (r'\s+', Text), # whitespace ], 'identifiers': [ (r'([a-zA-Z_$][\w$]*)', Name), ], 'numliterals': [ (r'[01]+B', Number.Bin), # binary number (ObjM2) (r'[0-7]+B', Number.Oct), # octal number (PIM + ISO) (r'[0-7]+C', Number.Oct), # char code (PIM + ISO) (r'[0-9A-F]+C', Number.Hex), # char code (ObjM2) (r'[0-9A-F]+H', Number.Hex), # hexadecimal number (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float), # real number (r'[0-9]+\.[0-9]+', Number.Float), # real number (r'[0-9]+', Number.Integer), # decimal whole number ], 'strings': [ (r"'(\\\\|\\'|[^'])*'", String), # single quoted string (r'"(\\\\|\\"|[^"])*"', String), # double quoted string ], 'operators': [ (r'[*/+=#~&<>\^-]', Operator), (r':=', Operator), # assignment (r'@', Operator), # pointer deref (ISO) (r'\.\.', Operator), # ellipsis or range (r'`', Operator), # Smalltalk message (ObjM2) (r'::', Operator), # type conversion (ObjM2) ], 'punctuation': [ (r'[()\[\]{},.:;|]', Punctuation), ], 'comments': [ (r'//.*?\n', Comment.Single), # ObjM2 (r'/\*(.*?)\*/', Comment.Multiline), # ObjM2 (r'\(\*([^$].*?)\*\)', Comment.Multiline), # TO DO: nesting of (* ... *) comments ], 'pragmas': [ (r'\(\*\$(.*?)\*\)', Comment.Preproc), # PIM (r'<\*(.*?)\*>', Comment.Preproc), # ISO + ObjM2 ], 'root': [ include('whitespace'), include('comments'), include('pragmas'), include('identifiers'), include('numliterals'), include('strings'), include('operators'), include('punctuation'), ] } pim_reserved_words = [ # 40 reserved words 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'EXPORT', 'FOR', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'POINTER', 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', 'WITH', ] pim_pervasives = [ # 31 pervasives 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'INC', 'INCL', 'INTEGER', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', 'ORD', 'PROC', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'VAL', ] iso_reserved_words = [ # 46 reserved words 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'EXCEPT', 'EXIT', 'EXPORT', 'FINALLY', 'FOR', 'FORWARD', 'FROM', 'IF', 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OR', 'PACKEDSET', 'POINTER', 'PROCEDURE', 'QUALIFIED', 'RECORD', 'REPEAT', 'REM', 'RETRY', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', 'WITH', ] iso_pervasives = [ # 42 pervasives 'ABS', 'BITSET', 'BOOLEAN', 'CAP', 'CARDINAL', 'CHAR', 'CHR', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FALSE', 'FLOAT', 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTEGER', 'INTERRUPTIBLE', 'LENGTH', 'LFLOAT', 'LONGCOMPLEX', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEW', 'NIL', 'ODD', 'ORD', 'PROC', 'PROTECTION', 'RE', 'REAL', 'SIZE', 'TRUE', 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', ] objm2_reserved_words = [ # base language, 42 reserved words 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', 'DO', 'ELSE', 'ELSIF', 'END', 'ENUM', 'EXIT', 'FOR', 'FROM', 'IF', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPORT', 'IN', 'IS', 'LOOP', 'MOD', 'MODULE', 'NOT', 'OF', 'OPAQUE', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'VARIADIC', 'WHILE', # OO extensions, 16 reserved words 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', 'SUPER', 'TRY', ] objm2_pervasives = [ # base language, 38 pervasives 'ABS', 'BITSET', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'DISPOSE', 'FALSE', 'HALT', 'HIGH', 'INTEGER', 'INRANGE', 'LENGTH', 'LONGCARD', 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NEG', 'NEW', 'NEXTV', 'NIL', 'OCTET', 'ODD', 'ORD', 'PRED', 'PROC', 'READ', 'REAL', 'SUCC', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', 'UNICHAR', 'VAL', 'WRITE', 'WRITEF', # OO extensions, 3 pervasives 'OBJECT', 'NO', 'YES', ] gnu_reserved_words = [ # 10 additional reserved words 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', ] gnu_pervasives = [ # 21 identifiers, actually from pseudo-module SYSTEM # but we will highlight them as if they were pervasives 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', ] def __init__(self, **options): self.reserved_words = set() self.pervasives = set() # ISO Modula-2 if get_bool_opt(options, 'iso', False): self.reserved_words.update(self.iso_reserved_words) self.pervasives.update(self.iso_pervasives) # Objective Modula-2 elif get_bool_opt(options, 'objm2', False): self.reserved_words.update(self.objm2_reserved_words) self.pervasives.update(self.objm2_pervasives) # PIM Modula-2 (DEFAULT) else: self.reserved_words.update(self.pim_reserved_words) self.pervasives.update(self.pim_pervasives) # GNU extensions if get_bool_opt(options, 'gm2ext', False): self.reserved_words.update(self.gnu_reserved_words) self.pervasives.update(self.gnu_pervasives) # initialise RegexLexer.__init__(self, **options) def get_tokens_unprocessed(self, text): for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): # check for reserved words and pervasives if token is Name: if value in self.reserved_words: token = Keyword.Reserved elif value in self.pervasives: token = Keyword.Pervasive # return result yield index, token, value class AdaLexer(RegexLexer): """ For Ada source code. .. versionadded:: 1.3 """ name = 'Ada' aliases = ['ada', 'ada95', 'ada2005'] filenames = ['*.adb', '*.ads', '*.ada'] mimetypes = ['text/x-ada'] flags = re.MULTILINE | re.IGNORECASE tokens = { 'root': [ (r'[^\S\n]+', Text), (r'--.*?\n', Comment.Single), (r'[^\S\n]+', Text), (r'function|procedure|entry', Keyword.Declaration, 'subprogram'), (r'(subtype|type)(\s+)(\w+)', bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'), (r'task|protected', Keyword.Declaration), (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)), (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'), (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text, Comment.Preproc)), (r'(true|false|null)\b', Keyword.Constant), (words(( 'Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count', 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator', 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float', 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type', 'Short_Float', 'Short_Integer', 'Short_Short_Float', 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'), suffix=r'\b'), Keyword.Type), (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word), (r'generic|private', Keyword.Declaration), (r'package', Keyword.Declaration, 'package'), (r'array\b', Keyword.Reserved, 'array_def'), (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'), (r'(\w+)(\s*)(:)(\s*)(constant)', bygroups(Name.Constant, Text, Punctuation, Text, Keyword.Reserved)), (r'<<\w+>>', Name.Label), (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)', bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)), (words(( 'abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all', 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare', 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry', 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited', 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding', 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue', 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized', 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when', 'while', 'xor'), prefix=r'\b', suffix=r'\b'), Keyword.Reserved), (r'"[^"]*"', String), include('attribute'), include('numbers'), (r"'[^']'", String.Character), (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))), (r"(<>|=>|:=|[()|:;,.'])", Punctuation), (r'[*<>+=/&-]', Operator), (r'\n+', Text), ], 'numbers': [ (r'[0-9_]+#[0-9a-f]+#', Number.Hex), (r'[0-9_]+\.[0-9_]*', Number.Float), (r'[0-9_]+', Number.Integer), ], 'attribute': [ (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)), ], 'subprogram': [ (r'\(', Punctuation, ('#pop', 'formal_part')), (r';', Punctuation, '#pop'), (r'is\b', Keyword.Reserved, '#pop'), (r'"[^"]+"|\w+', Name.Function), include('root'), ], 'end': [ ('(if|case|record|loop|select)', Keyword.Reserved), ('"[^"]+"|[\w.]+', Name.Function), ('\s+', Text), (';', Punctuation, '#pop'), ], 'type_def': [ (r';', Punctuation, '#pop'), (r'\(', Punctuation, 'formal_part'), (r'with|and|use', Keyword.Reserved), (r'array\b', Keyword.Reserved, ('#pop', 'array_def')), (r'record\b', Keyword.Reserved, ('record_def')), (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'), include('root'), ], 'array_def': [ (r';', Punctuation, '#pop'), (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)), include('root'), ], 'record_def': [ (r'end record', Keyword.Reserved, '#pop'), include('root'), ], 'import': [ (r'[\w.]+', Name.Namespace, '#pop'), default('#pop'), ], 'formal_part': [ (r'\)', Punctuation, '#pop'), (r'\w+', Name.Variable), (r',|:[^=]', Punctuation), (r'(in|not|null|out|access)\b', Keyword.Reserved), include('root'), ], 'package': [ ('body', Keyword.Declaration), ('is\s+new|renames', Keyword.Reserved), ('is', Keyword.Reserved, '#pop'), (';', Punctuation, '#pop'), ('\(', Punctuation, 'package_instantiation'), ('([\w.]+)', Name.Class), include('root'), ], 'package_instantiation': [ (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)), (r'[\w.\'"]', Text), (r'\)', Punctuation, '#pop'), include('root'), ], }