1"""
2 pygments.lexers.pascal
3 ~~~~~~~~~~~~~~~~~~~~~~
4
5 Lexers for Pascal family languages.
6
7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
10
11import re
12
13from pygments.lexer import Lexer
14from pygments.util import get_bool_opt, get_list_opt
15from pygments.token import Comment, Operator, Keyword, Name, String, \
16 Number, Punctuation, Error, Whitespace
17from pygments.scanner import Scanner
18
19# compatibility import
20from pygments.lexers.modula2 import Modula2Lexer # noqa: F401
21
22__all__ = ['DelphiLexer', 'PortugolLexer']
23
24
25class PortugolLexer(Lexer):
26 """For Portugol, a Pascal dialect with keywords in Portuguese."""
27 name = 'Portugol'
28 aliases = ['portugol']
29 filenames = ['*.alg', '*.portugol']
30 mimetypes = []
31 url = "https://www.apoioinformatica.inf.br/produtos/visualg/linguagem"
32 version_added = ''
33
34 def __init__(self, **options):
35 Lexer.__init__(self, **options)
36 self.lexer = DelphiLexer(**options, portugol=True)
37
38 def get_tokens_unprocessed(self, text):
39 return self.lexer.get_tokens_unprocessed(text)
40
41
42class DelphiLexer(Lexer):
43 """
44 For Delphi (Borland Object Pascal),
45 Turbo Pascal and Free Pascal source code.
46
47 Additional options accepted:
48
49 `turbopascal`
50 Highlight Turbo Pascal specific keywords (default: ``True``).
51 `delphi`
52 Highlight Borland Delphi specific keywords (default: ``True``).
53 `freepascal`
54 Highlight Free Pascal specific keywords (default: ``True``).
55 `units`
56 A list of units that should be considered builtin, supported are
57 ``System``, ``SysUtils``, ``Classes`` and ``Math``.
58 Default is to consider all of them builtin.
59 """
60 name = 'Delphi'
61 aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
62 filenames = ['*.pas', '*.dpr']
63 mimetypes = ['text/x-pascal']
64 url = 'https://www.embarcadero.com/products/delphi'
65 version_added = ''
66
67 TURBO_PASCAL_KEYWORDS = (
68 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
69 'const', 'constructor', 'continue', 'destructor', 'div', 'do',
70 'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
71 'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
72 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
73 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
74 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
75 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
76 )
77
78 DELPHI_KEYWORDS = (
79 'as', 'class', 'except', 'exports', 'finalization', 'finally',
80 'initialization', 'is', 'library', 'on', 'property', 'raise',
81 'threadvar', 'try'
82 )
83
84 FREE_PASCAL_KEYWORDS = (
85 'dispose', 'exit', 'false', 'new', 'true'
86 )
87
88 BLOCK_KEYWORDS = {
89 'begin', 'class', 'const', 'constructor', 'destructor', 'end',
90 'finalization', 'function', 'implementation', 'initialization',
91 'label', 'library', 'operator', 'procedure', 'program', 'property',
92 'record', 'threadvar', 'type', 'unit', 'uses', 'var'
93 }
94
95 FUNCTION_MODIFIERS = {
96 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
97 'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
98 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
99 'override', 'assembler'
100 }
101
102 # XXX: those aren't global. but currently we know no way for defining
103 # them just for the type context.
104 DIRECTIVES = {
105 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
106 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
107 'published', 'public'
108 }
109
110 BUILTIN_TYPES = {
111 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
112 'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
113 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
114 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
115 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
116 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
117 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
118 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
119 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
120 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
121 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
122 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
123 'widechar', 'widestring', 'word', 'wordbool'
124 }
125
126 BUILTIN_UNITS = {
127 'System': (
128 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
129 'append', 'arctan', 'assert', 'assigned', 'assignfile',
130 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
131 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
132 'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
133 'dispose', 'doubletocomp', 'endthread', 'enummodules',
134 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
135 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
136 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
137 'findresourcehinstance', 'flush', 'frac', 'freemem',
138 'get8087cw', 'getdir', 'getlasterror', 'getmem',
139 'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
140 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
141 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
142 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
143 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
144 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
145 'randomize', 'read', 'readln', 'reallocmem',
146 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
147 'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
148 'set8087cw', 'setlength', 'setlinebreakstyle',
149 'setmemorymanager', 'setstring', 'settextbuf',
150 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
151 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
152 'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
153 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
154 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
155 'utf8tounicode', 'val', 'vararrayredim', 'varclear',
156 'widecharlentostring', 'widecharlentostrvar',
157 'widechartostring', 'widechartostrvar',
158 'widestringtoucs4string', 'write', 'writeln'
159 ),
160 'SysUtils': (
161 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
162 'allocmem', 'ansicomparefilename', 'ansicomparestr',
163 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
164 'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
165 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
166 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
167 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
168 'ansistrscan', 'ansistrupper', 'ansiuppercase',
169 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
170 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
171 'callterminateprocs', 'changefileext', 'charlength',
172 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
173 'comparetext', 'createdir', 'createguid', 'currentyear',
174 'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
175 'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
176 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
177 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
178 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
179 'exceptionerrormessage', 'excludetrailingbackslash',
180 'excludetrailingpathdelimiter', 'expandfilename',
181 'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
182 'extractfiledrive', 'extractfileext', 'extractfilename',
183 'extractfilepath', 'extractrelativepath', 'extractshortpathname',
184 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
185 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
186 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
187 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
188 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
189 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
190 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
191 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
192 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
193 'getenvironmentvariable', 'getfileversion', 'getformatsettings',
194 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
195 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
196 'includetrailingbackslash', 'includetrailingpathdelimiter',
197 'incmonth', 'initializepackage', 'interlockeddecrement',
198 'interlockedexchange', 'interlockedexchangeadd',
199 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
200 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
201 'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
202 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
203 'outofmemoryerror', 'quotedstr', 'raiselastoserror',
204 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
205 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
206 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
207 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
208 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
209 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
210 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
211 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
212 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
213 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
214 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
215 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
216 'strtotimedef', 'strupper', 'supports', 'syserrormessage',
217 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
218 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
219 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
220 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
221 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
222 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
223 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
224 'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
225 'wraptext'
226 ),
227 'Classes': (
228 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
229 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
230 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
231 'groupdescendantswith', 'hextobin', 'identtoint',
232 'initinheritedcomponent', 'inttoident', 'invalidpoint',
233 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
234 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
235 'pointsequal', 'readcomponentres', 'readcomponentresex',
236 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
237 'registerclasses', 'registercomponents', 'registerintegerconsts',
238 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
239 'teststreamformat', 'unregisterclass', 'unregisterclasses',
240 'unregisterintegerconsts', 'unregistermoduleclasses',
241 'writecomponentresfile'
242 ),
243 'Math': (
244 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
245 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
246 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
247 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
248 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
249 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
250 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
251 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
252 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
253 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
254 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
255 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
256 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
257 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
258 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
259 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
260 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
261 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
262 'tan', 'tanh', 'totalvariance', 'variance'
263 )
264 }
265
266 ASM_REGISTERS = {
267 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
268 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
269 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
270 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
271 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
272 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
273 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
274 'xmm6', 'xmm7'
275 }
276
277 ASM_INSTRUCTIONS = {
278 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
279 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
280 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
281 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
282 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
283 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
284 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
285 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
286 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
287 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
288 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
289 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
290 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
291 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
292 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
293 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
294 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
295 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
296 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
297 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
298 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
299 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
300 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
301 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
302 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
303 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
304 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
305 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
306 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
307 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
308 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
309 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
310 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
311 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
312 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
313 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
314 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
315 'xlatb', 'xor'
316 }
317
318 PORTUGOL_KEYWORDS = (
319 'aleatorio',
320 'algoritmo',
321 'arquivo',
322 'ate',
323 'caso',
324 'cronometro',
325 'debug',
326 'e',
327 'eco',
328 'enquanto',
329 'entao',
330 'escolha',
331 'escreva',
332 'escreval',
333 'faca',
334 'falso',
335 'fimalgoritmo',
336 'fimenquanto',
337 'fimescolha',
338 'fimfuncao',
339 'fimpara',
340 'fimprocedimento',
341 'fimrepita',
342 'fimse',
343 'funcao',
344 'inicio',
345 'int',
346 'interrompa',
347 'leia',
348 'limpatela',
349 'mod',
350 'nao',
351 'ou',
352 'outrocaso',
353 'para',
354 'passo',
355 'pausa',
356 'procedimento',
357 'repita',
358 'retorne',
359 'se',
360 'senao',
361 'timer',
362 'var',
363 'vetor',
364 'verdadeiro',
365 'xou',
366 'div',
367 'mod',
368 'abs',
369 'arccos',
370 'arcsen',
371 'arctan',
372 'cos',
373 'cotan',
374 'Exp',
375 'grauprad',
376 'int',
377 'log',
378 'logn',
379 'pi',
380 'quad',
381 'radpgrau',
382 'raizq',
383 'rand',
384 'randi',
385 'sen',
386 'Tan',
387 'asc',
388 'carac',
389 'caracpnum',
390 'compr',
391 'copia',
392 'maiusc',
393 'minusc',
394 'numpcarac',
395 'pos',
396 )
397
398 PORTUGOL_BUILTIN_TYPES = {
399 'inteiro', 'real', 'caractere', 'logico'
400 }
401
402 def __init__(self, **options):
403 Lexer.__init__(self, **options)
404 self.keywords = set()
405 self.builtins = set()
406 if get_bool_opt(options, 'portugol', False):
407 self.keywords.update(self.PORTUGOL_KEYWORDS)
408 self.builtins.update(self.PORTUGOL_BUILTIN_TYPES)
409 self.is_portugol = True
410 else:
411 self.is_portugol = False
412
413 if get_bool_opt(options, 'turbopascal', True):
414 self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
415 if get_bool_opt(options, 'delphi', True):
416 self.keywords.update(self.DELPHI_KEYWORDS)
417 if get_bool_opt(options, 'freepascal', True):
418 self.keywords.update(self.FREE_PASCAL_KEYWORDS)
419 for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
420 self.builtins.update(self.BUILTIN_UNITS[unit])
421
422 def get_tokens_unprocessed(self, text):
423 scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
424 stack = ['initial']
425 in_function_block = False
426 in_property_block = False
427 was_dot = False
428 next_token_is_function = False
429 next_token_is_property = False
430 collect_labels = False
431 block_labels = set()
432 brace_balance = [0, 0]
433
434 while not scanner.eos:
435 token = Error
436
437 if stack[-1] == 'initial':
438 if scanner.scan(r'\s+'):
439 token = Whitespace
440 elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
441 if scanner.match.startswith('$'):
442 token = Comment.Preproc
443 else:
444 token = Comment.Multiline
445 elif scanner.scan(r'//.*?$'):
446 token = Comment.Single
447 elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'):
448 token = Operator
449 elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'):
450 token = Operator
451 # stop label highlighting on next ";"
452 if collect_labels and scanner.match == ';':
453 collect_labels = False
454 elif scanner.scan(r'[\(\)\[\]]+'):
455 token = Punctuation
456 # abort function naming ``foo = Function(...)``
457 next_token_is_function = False
458 # if we are in a function block we count the open
459 # braces because ootherwise it's impossible to
460 # determine the end of the modifier context
461 if in_function_block or in_property_block:
462 if scanner.match == '(':
463 brace_balance[0] += 1
464 elif scanner.match == ')':
465 brace_balance[0] -= 1
466 elif scanner.match == '[':
467 brace_balance[1] += 1
468 elif scanner.match == ']':
469 brace_balance[1] -= 1
470 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
471 lowercase_name = scanner.match.lower()
472 if lowercase_name == 'result':
473 token = Name.Builtin.Pseudo
474 elif lowercase_name in self.keywords:
475 token = Keyword
476 # if we are in a special block and a
477 # block ending keyword occurs (and the parenthesis
478 # is balanced) we end the current block context
479 if self.is_portugol:
480 if lowercase_name in ('funcao', 'procedimento'):
481 in_function_block = True
482 next_token_is_function = True
483 else:
484 if (in_function_block or in_property_block) and \
485 lowercase_name in self.BLOCK_KEYWORDS and \
486 brace_balance[0] <= 0 and \
487 brace_balance[1] <= 0:
488 in_function_block = False
489 in_property_block = False
490 brace_balance = [0, 0]
491 block_labels = set()
492 if lowercase_name in ('label', 'goto'):
493 collect_labels = True
494 elif lowercase_name == 'asm':
495 stack.append('asm')
496 elif lowercase_name == 'property':
497 in_property_block = True
498 next_token_is_property = True
499 elif lowercase_name in ('procedure', 'operator',
500 'function', 'constructor',
501 'destructor'):
502 in_function_block = True
503 next_token_is_function = True
504 # we are in a function block and the current name
505 # is in the set of registered modifiers. highlight
506 # it as pseudo keyword
507 elif not self.is_portugol and in_function_block and \
508 lowercase_name in self.FUNCTION_MODIFIERS:
509 token = Keyword.Pseudo
510 # if we are in a property highlight some more
511 # modifiers
512 elif not self.is_portugol and in_property_block and \
513 lowercase_name in ('read', 'write'):
514 token = Keyword.Pseudo
515 next_token_is_function = True
516 # if the last iteration set next_token_is_function
517 # to true we now want this name highlighted as
518 # function. so do that and reset the state
519 elif next_token_is_function:
520 # Look if the next token is a dot. If yes it's
521 # not a function, but a class name and the
522 # part after the dot a function name
523 if not self.is_portugol and scanner.test(r'\s*\.\s*'):
524 token = Name.Class
525 # it's not a dot, our job is done
526 else:
527 token = Name.Function
528 next_token_is_function = False
529
530 if self.is_portugol:
531 block_labels.add(scanner.match.lower())
532
533 # same for properties
534 elif not self.is_portugol and next_token_is_property:
535 token = Name.Property
536 next_token_is_property = False
537 # Highlight this token as label and add it
538 # to the list of known labels
539 elif not self.is_portugol and collect_labels:
540 token = Name.Label
541 block_labels.add(scanner.match.lower())
542 # name is in list of known labels
543 elif lowercase_name in block_labels:
544 token = Name.Label
545 elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES:
546 token = Keyword.Type
547 elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES:
548 token = Keyword.Type
549 elif not self.is_portugol and lowercase_name in self.DIRECTIVES:
550 token = Keyword.Pseudo
551 # builtins are just builtins if the token
552 # before isn't a dot
553 elif not self.is_portugol and not was_dot and lowercase_name in self.builtins:
554 token = Name.Builtin
555 else:
556 token = Name
557 elif self.is_portugol and scanner.scan(r"\""):
558 token = String
559 stack.append('string')
560 elif not self.is_portugol and scanner.scan(r"'"):
561 token = String
562 stack.append('string')
563 elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
564 token = String.Char
565 elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'):
566 token = Number.Hex
567 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
568 token = Number.Integer
569 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
570 token = Number.Float
571 else:
572 # if the stack depth is deeper than once, pop
573 if len(stack) > 1:
574 stack.pop()
575 scanner.get_char()
576
577 elif stack[-1] == 'string':
578 if self.is_portugol:
579 if scanner.scan(r"''"):
580 token = String.Escape
581 elif scanner.scan(r"\""):
582 token = String
583 stack.pop()
584 elif scanner.scan(r"[^\"]*"):
585 token = String
586 else:
587 scanner.get_char()
588 stack.pop()
589 else:
590 if scanner.scan(r"''"):
591 token = String.Escape
592 elif scanner.scan(r"'"):
593 token = String
594 stack.pop()
595 elif scanner.scan(r"[^']*"):
596 token = String
597 else:
598 scanner.get_char()
599 stack.pop()
600 elif not self.is_portugol and stack[-1] == 'asm':
601 if scanner.scan(r'\s+'):
602 token = Whitespace
603 elif scanner.scan(r'end'):
604 token = Keyword
605 stack.pop()
606 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
607 if scanner.match.startswith('$'):
608 token = Comment.Preproc
609 else:
610 token = Comment.Multiline
611 elif scanner.scan(r'//.*?$'):
612 token = Comment.Single
613 elif scanner.scan(r"'"):
614 token = String
615 stack.append('string')
616 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
617 token = Name.Label
618 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
619 lowercase_name = scanner.match.lower()
620 if lowercase_name in self.ASM_INSTRUCTIONS:
621 token = Keyword
622 elif lowercase_name in self.ASM_REGISTERS:
623 token = Name.Builtin
624 else:
625 token = Name
626 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
627 token = Operator
628 elif scanner.scan(r'[\(\)\[\]]+'):
629 token = Punctuation
630 elif scanner.scan(r'\$[0-9A-Fa-f]+'):
631 token = Number.Hex
632 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
633 token = Number.Integer
634 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
635 token = Number.Float
636 else:
637 scanner.get_char()
638 stack.pop()
639
640 # save the dot!!!11
641 if not self.is_portugol and scanner.match.strip():
642 was_dot = scanner.match == '.'
643
644 yield scanner.start_pos, token, scanner.match or ''