Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/pascal.py: 48%
222 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:07 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:07 +0000
1"""
2 pygments.lexers.pascal
3 ~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for Pascal family languages.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import Lexer
14from pygments.util import get_bool_opt, get_list_opt
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Error, Whitespace
17from pygments.scanner import Scanner
19# compatibility import
20from pygments.lexers.modula2 import Modula2Lexer
22__all__ = ['DelphiLexer', 'PortugolLexer']
25class PortugolLexer(Lexer):
26 """For Portugol, a Pascal dialect with keywords in Portuguese."""
27 name = 'Portugol'
28 aliases = ['portugol']
29 filenames = ['*.alg', '*.portugol']
30 mimetypes = []
31 url = "https://www.apoioinformatica.inf.br/produtos/visualg/linguagem"
33 def __init__(self, **options):
34 Lexer.__init__(self, **options)
35 self.lexer = DelphiLexer(**options, portugol=True)
37 def get_tokens_unprocessed(self, text):
38 return self.lexer.get_tokens_unprocessed(text)
41class DelphiLexer(Lexer):
42 """
43 For Delphi (Borland Object Pascal),
44 Turbo Pascal and Free Pascal source code.
46 Additional options accepted:
48 `turbopascal`
49 Highlight Turbo Pascal specific keywords (default: ``True``).
50 `delphi`
51 Highlight Borland Delphi specific keywords (default: ``True``).
52 `freepascal`
53 Highlight Free Pascal specific keywords (default: ``True``).
54 `units`
55 A list of units that should be considered builtin, supported are
56 ``System``, ``SysUtils``, ``Classes`` and ``Math``.
57 Default is to consider all of them builtin.
58 """
59 name = 'Delphi'
60 aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
61 filenames = ['*.pas', '*.dpr']
62 mimetypes = ['text/x-pascal']
64 TURBO_PASCAL_KEYWORDS = (
65 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
66 'const', 'constructor', 'continue', 'destructor', 'div', 'do',
67 'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
68 'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
69 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
70 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
71 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
72 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
73 )
75 DELPHI_KEYWORDS = (
76 'as', 'class', 'except', 'exports', 'finalization', 'finally',
77 'initialization', 'is', 'library', 'on', 'property', 'raise',
78 'threadvar', 'try'
79 )
81 FREE_PASCAL_KEYWORDS = (
82 'dispose', 'exit', 'false', 'new', 'true'
83 )
85 BLOCK_KEYWORDS = {
86 'begin', 'class', 'const', 'constructor', 'destructor', 'end',
87 'finalization', 'function', 'implementation', 'initialization',
88 'label', 'library', 'operator', 'procedure', 'program', 'property',
89 'record', 'threadvar', 'type', 'unit', 'uses', 'var'
90 }
92 FUNCTION_MODIFIERS = {
93 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
94 'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
95 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
96 'override', 'assembler'
97 }
99 # XXX: those aren't global. but currently we know no way for defining
100 # them just for the type context.
101 DIRECTIVES = {
102 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
103 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
104 'published', 'public'
105 }
107 BUILTIN_TYPES = {
108 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
109 'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
110 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
111 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
112 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
113 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
114 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
115 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
116 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
117 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
118 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
119 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
120 'widechar', 'widestring', 'word', 'wordbool'
121 }
123 BUILTIN_UNITS = {
124 'System': (
125 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
126 'append', 'arctan', 'assert', 'assigned', 'assignfile',
127 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
128 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
129 'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
130 'dispose', 'doubletocomp', 'endthread', 'enummodules',
131 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
132 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
133 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
134 'findresourcehinstance', 'flush', 'frac', 'freemem',
135 'get8087cw', 'getdir', 'getlasterror', 'getmem',
136 'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
137 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
138 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
139 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
140 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
141 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
142 'randomize', 'read', 'readln', 'reallocmem',
143 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
144 'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
145 'set8087cw', 'setlength', 'setlinebreakstyle',
146 'setmemorymanager', 'setstring', 'settextbuf',
147 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
148 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
149 'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
150 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
151 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
152 'utf8tounicode', 'val', 'vararrayredim', 'varclear',
153 'widecharlentostring', 'widecharlentostrvar',
154 'widechartostring', 'widechartostrvar',
155 'widestringtoucs4string', 'write', 'writeln'
156 ),
157 'SysUtils': (
158 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
159 'allocmem', 'ansicomparefilename', 'ansicomparestr',
160 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
161 'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
162 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
163 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
164 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
165 'ansistrscan', 'ansistrupper', 'ansiuppercase',
166 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
167 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
168 'callterminateprocs', 'changefileext', 'charlength',
169 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
170 'comparetext', 'createdir', 'createguid', 'currentyear',
171 'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
172 'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
173 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
174 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
175 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
176 'exceptionerrormessage', 'excludetrailingbackslash',
177 'excludetrailingpathdelimiter', 'expandfilename',
178 'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
179 'extractfiledrive', 'extractfileext', 'extractfilename',
180 'extractfilepath', 'extractrelativepath', 'extractshortpathname',
181 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
182 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
183 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
184 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
185 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
186 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
187 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
188 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
189 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
190 'getenvironmentvariable', 'getfileversion', 'getformatsettings',
191 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
192 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
193 'includetrailingbackslash', 'includetrailingpathdelimiter',
194 'incmonth', 'initializepackage', 'interlockeddecrement',
195 'interlockedexchange', 'interlockedexchangeadd',
196 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
197 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
198 'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
199 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
200 'outofmemoryerror', 'quotedstr', 'raiselastoserror',
201 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
202 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
203 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
204 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
205 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
206 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
207 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
208 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
209 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
210 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
211 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
212 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
213 'strtotimedef', 'strupper', 'supports', 'syserrormessage',
214 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
215 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
216 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
217 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
218 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
219 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
220 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
221 'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
222 'wraptext'
223 ),
224 'Classes': (
225 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
226 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
227 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
228 'groupdescendantswith', 'hextobin', 'identtoint',
229 'initinheritedcomponent', 'inttoident', 'invalidpoint',
230 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
231 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
232 'pointsequal', 'readcomponentres', 'readcomponentresex',
233 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
234 'registerclasses', 'registercomponents', 'registerintegerconsts',
235 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
236 'teststreamformat', 'unregisterclass', 'unregisterclasses',
237 'unregisterintegerconsts', 'unregistermoduleclasses',
238 'writecomponentresfile'
239 ),
240 'Math': (
241 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
242 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
243 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
244 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
245 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
246 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
247 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
248 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
249 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
250 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
251 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
252 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
253 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
254 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
255 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
256 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
257 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
258 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
259 'tan', 'tanh', 'totalvariance', 'variance'
260 )
261 }
263 ASM_REGISTERS = {
264 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
265 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
266 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
267 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
268 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
269 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
270 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
271 'xmm6', 'xmm7'
272 }
274 ASM_INSTRUCTIONS = {
275 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
276 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
277 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
278 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
279 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
280 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
281 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
282 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
283 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
284 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
285 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
286 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
287 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
288 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
289 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
290 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
291 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
292 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
293 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
294 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
295 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
296 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
297 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
298 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
299 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
300 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
301 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
302 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
303 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
304 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
305 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
306 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
307 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
308 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
309 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
310 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
311 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
312 'xlatb', 'xor'
313 }
315 PORTUGOL_KEYWORDS = (
316 'aleatorio',
317 'algoritmo',
318 'arquivo',
319 'ate',
320 'caso',
321 'cronometro',
322 'debug',
323 'e',
324 'eco',
325 'enquanto',
326 'entao',
327 'escolha',
328 'escreva',
329 'escreval',
330 'faca',
331 'falso',
332 'fimalgoritmo',
333 'fimenquanto',
334 'fimescolha',
335 'fimfuncao',
336 'fimpara',
337 'fimprocedimento',
338 'fimrepita',
339 'fimse',
340 'funcao',
341 'inicio',
342 'int',
343 'interrompa',
344 'leia',
345 'limpatela',
346 'mod',
347 'nao',
348 'ou',
349 'outrocaso',
350 'para',
351 'passo',
352 'pausa',
353 'procedimento',
354 'repita',
355 'retorne',
356 'se',
357 'senao',
358 'timer',
359 'var',
360 'vetor',
361 'verdadeiro',
362 'xou',
363 'div',
364 'mod',
365 'abs',
366 'arccos',
367 'arcsen',
368 'arctan',
369 'cos',
370 'cotan',
371 'Exp',
372 'grauprad',
373 'int',
374 'log',
375 'logn',
376 'pi',
377 'quad',
378 'radpgrau',
379 'raizq',
380 'rand',
381 'randi',
382 'sen',
383 'Tan',
384 'asc',
385 'carac',
386 'caracpnum',
387 'compr',
388 'copia',
389 'maiusc',
390 'minusc',
391 'numpcarac',
392 'pos',
393 )
395 PORTUGOL_BUILTIN_TYPES = {
396 'inteiro', 'real', 'caractere', 'logico'
397 }
399 def __init__(self, **options):
400 Lexer.__init__(self, **options)
401 self.keywords = set()
402 self.builtins = set()
403 if get_bool_opt(options, 'portugol', False):
404 self.keywords.update(self.PORTUGOL_KEYWORDS)
405 self.builtins.update(self.PORTUGOL_BUILTIN_TYPES)
406 self.is_portugol = True
407 else:
408 self.is_portugol = False
410 if get_bool_opt(options, 'turbopascal', True):
411 self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
412 if get_bool_opt(options, 'delphi', True):
413 self.keywords.update(self.DELPHI_KEYWORDS)
414 if get_bool_opt(options, 'freepascal', True):
415 self.keywords.update(self.FREE_PASCAL_KEYWORDS)
416 for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
417 self.builtins.update(self.BUILTIN_UNITS[unit])
419 def get_tokens_unprocessed(self, text):
420 scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
421 stack = ['initial']
422 in_function_block = False
423 in_property_block = False
424 was_dot = False
425 next_token_is_function = False
426 next_token_is_property = False
427 collect_labels = False
428 block_labels = set()
429 brace_balance = [0, 0]
431 while not scanner.eos:
432 token = Error
434 if stack[-1] == 'initial':
435 if scanner.scan(r'\s+'):
436 token = Whitespace
437 elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
438 if scanner.match.startswith('$'):
439 token = Comment.Preproc
440 else:
441 token = Comment.Multiline
442 elif scanner.scan(r'//.*?$'):
443 token = Comment.Single
444 elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'):
445 token = Operator
446 elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'):
447 token = Operator
448 # stop label highlighting on next ";"
449 if collect_labels and scanner.match == ';':
450 collect_labels = False
451 elif scanner.scan(r'[\(\)\[\]]+'):
452 token = Punctuation
453 # abort function naming ``foo = Function(...)``
454 next_token_is_function = False
455 # if we are in a function block we count the open
456 # braces because ootherwise it's impossible to
457 # determine the end of the modifier context
458 if in_function_block or in_property_block:
459 if scanner.match == '(':
460 brace_balance[0] += 1
461 elif scanner.match == ')':
462 brace_balance[0] -= 1
463 elif scanner.match == '[':
464 brace_balance[1] += 1
465 elif scanner.match == ']':
466 brace_balance[1] -= 1
467 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
468 lowercase_name = scanner.match.lower()
469 if lowercase_name == 'result':
470 token = Name.Builtin.Pseudo
471 elif lowercase_name in self.keywords:
472 token = Keyword
473 # if we are in a special block and a
474 # block ending keyword occurs (and the parenthesis
475 # is balanced) we end the current block context
476 if self.is_portugol:
477 if lowercase_name in ('funcao', 'procedimento'):
478 in_function_block = True
479 next_token_is_function = True
480 else:
481 if (in_function_block or in_property_block) and \
482 lowercase_name in self.BLOCK_KEYWORDS and \
483 brace_balance[0] <= 0 and \
484 brace_balance[1] <= 0:
485 in_function_block = False
486 in_property_block = False
487 brace_balance = [0, 0]
488 block_labels = set()
489 if lowercase_name in ('label', 'goto'):
490 collect_labels = True
491 elif lowercase_name == 'asm':
492 stack.append('asm')
493 elif lowercase_name == 'property':
494 in_property_block = True
495 next_token_is_property = True
496 elif lowercase_name in ('procedure', 'operator',
497 'function', 'constructor',
498 'destructor'):
499 in_function_block = True
500 next_token_is_function = True
501 # we are in a function block and the current name
502 # is in the set of registered modifiers. highlight
503 # it as pseudo keyword
504 elif not self.is_portugol and in_function_block and \
505 lowercase_name in self.FUNCTION_MODIFIERS:
506 token = Keyword.Pseudo
507 # if we are in a property highlight some more
508 # modifiers
509 elif not self.is_portugol and in_property_block and \
510 lowercase_name in ('read', 'write'):
511 token = Keyword.Pseudo
512 next_token_is_function = True
513 # if the last iteration set next_token_is_function
514 # to true we now want this name highlighted as
515 # function. so do that and reset the state
516 elif next_token_is_function:
517 # Look if the next token is a dot. If yes it's
518 # not a function, but a class name and the
519 # part after the dot a function name
520 if not self.is_portugol and scanner.test(r'\s*\.\s*'):
521 token = Name.Class
522 # it's not a dot, our job is done
523 else:
524 token = Name.Function
525 next_token_is_function = False
527 if self.is_portugol:
528 block_labels.add(scanner.match.lower())
530 # same for properties
531 elif not self.is_portugol and next_token_is_property:
532 token = Name.Property
533 next_token_is_property = False
534 # Highlight this token as label and add it
535 # to the list of known labels
536 elif not self.is_portugol and collect_labels:
537 token = Name.Label
538 block_labels.add(scanner.match.lower())
539 # name is in list of known labels
540 elif lowercase_name in block_labels:
541 token = Name.Label
542 elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES:
543 token = Keyword.Type
544 elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES:
545 token = Keyword.Type
546 elif not self.is_portugol and lowercase_name in self.DIRECTIVES:
547 token = Keyword.Pseudo
548 # builtins are just builtins if the token
549 # before isn't a dot
550 elif not self.is_portugol and not was_dot and lowercase_name in self.builtins:
551 token = Name.Builtin
552 else:
553 token = Name
554 elif self.is_portugol and scanner.scan(r"\""):
555 token = String
556 stack.append('string')
557 elif not self.is_portugol and scanner.scan(r"'"):
558 token = String
559 stack.append('string')
560 elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
561 token = String.Char
562 elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'):
563 token = Number.Hex
564 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
565 token = Number.Integer
566 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
567 token = Number.Float
568 else:
569 # if the stack depth is deeper than once, pop
570 if len(stack) > 1:
571 stack.pop()
572 scanner.get_char()
574 elif stack[-1] == 'string':
575 if self.is_portugol:
576 if scanner.scan(r"''"):
577 token = String.Escape
578 elif scanner.scan(r"\""):
579 token = String
580 stack.pop()
581 elif scanner.scan(r"[^\"]*"):
582 token = String
583 else:
584 scanner.get_char()
585 stack.pop()
586 else:
587 if scanner.scan(r"''"):
588 token = String.Escape
589 elif scanner.scan(r"'"):
590 token = String
591 stack.pop()
592 elif scanner.scan(r"[^']*"):
593 token = String
594 else:
595 scanner.get_char()
596 stack.pop()
597 elif not self.is_portugol and stack[-1] == 'asm':
598 if scanner.scan(r'\s+'):
599 token = Whitespace
600 elif scanner.scan(r'end'):
601 token = Keyword
602 stack.pop()
603 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
604 if scanner.match.startswith('$'):
605 token = Comment.Preproc
606 else:
607 token = Comment.Multiline
608 elif scanner.scan(r'//.*?$'):
609 token = Comment.Single
610 elif scanner.scan(r"'"):
611 token = String
612 stack.append('string')
613 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
614 token = Name.Label
615 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
616 lowercase_name = scanner.match.lower()
617 if lowercase_name in self.ASM_INSTRUCTIONS:
618 token = Keyword
619 elif lowercase_name in self.ASM_REGISTERS:
620 token = Name.Builtin
621 else:
622 token = Name
623 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
624 token = Operator
625 elif scanner.scan(r'[\(\)\[\]]+'):
626 token = Punctuation
627 elif scanner.scan(r'\$[0-9A-Fa-f]+'):
628 token = Number.Hex
629 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
630 token = Number.Integer
631 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
632 token = Number.Float
633 else:
634 scanner.get_char()
635 stack.pop()
637 # save the dot!!!11
638 if not self.is_portugol and scanner.match.strip():
639 was_dot = scanner.match == '.'
641 yield scanner.start_pos, token, scanner.match or ''