Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/pascal.py: 55%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

226 statements  

1""" 

2 pygments.lexers.pascal 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Pascal family languages. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer 

14from pygments.util import get_bool_opt, get_list_opt 

15from pygments.token import Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Error, Whitespace 

17from pygments.scanner import Scanner 

18 

19# compatibility import 

20from pygments.lexers.modula2 import Modula2Lexer # noqa: F401 

21 

22__all__ = ['DelphiLexer', 'PortugolLexer'] 

23 

24 

25class PortugolLexer(Lexer): 

26 """For Portugol, a Pascal dialect with keywords in Portuguese.""" 

27 name = 'Portugol' 

28 aliases = ['portugol'] 

29 filenames = ['*.alg', '*.portugol'] 

30 mimetypes = [] 

31 url = "https://www.apoioinformatica.inf.br/produtos/visualg/linguagem" 

32 version_added = '' 

33 

34 def __init__(self, **options): 

35 Lexer.__init__(self, **options) 

36 self.lexer = DelphiLexer(**options, portugol=True) 

37 

38 def get_tokens_unprocessed(self, text): 

39 return self.lexer.get_tokens_unprocessed(text) 

40 

41 

42class DelphiLexer(Lexer): 

43 """ 

44 For Delphi (Borland Object Pascal), 

45 Turbo Pascal and Free Pascal source code. 

46 

47 Additional options accepted: 

48 

49 `turbopascal` 

50 Highlight Turbo Pascal specific keywords (default: ``True``). 

51 `delphi` 

52 Highlight Borland Delphi specific keywords (default: ``True``). 

53 `freepascal` 

54 Highlight Free Pascal specific keywords (default: ``True``). 

55 `units` 

56 A list of units that should be considered builtin, supported are 

57 ``System``, ``SysUtils``, ``Classes`` and ``Math``. 

58 Default is to consider all of them builtin. 

59 """ 

60 name = 'Delphi' 

61 aliases = ['delphi', 'pas', 'pascal', 'objectpascal'] 

62 filenames = ['*.pas', '*.dpr'] 

63 mimetypes = ['text/x-pascal'] 

64 url = 'https://www.embarcadero.com/products/delphi' 

65 version_added = '' 

66 

67 TURBO_PASCAL_KEYWORDS = ( 

68 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case', 

69 'const', 'constructor', 'continue', 'destructor', 'div', 'do', 

70 'downto', 'else', 'end', 'file', 'for', 'function', 'goto', 

71 'if', 'implementation', 'in', 'inherited', 'inline', 'interface', 

72 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator', 

73 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce', 

74 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to', 

75 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor' 

76 ) 

77 

78 DELPHI_KEYWORDS = ( 

79 'as', 'class', 'except', 'exports', 'finalization', 'finally', 

80 'initialization', 'is', 'library', 'on', 'property', 'raise', 

81 'threadvar', 'try' 

82 ) 

83 

84 FREE_PASCAL_KEYWORDS = ( 

85 'dispose', 'exit', 'false', 'new', 'true' 

86 ) 

87 

88 BLOCK_KEYWORDS = { 

89 'begin', 'class', 'const', 'constructor', 'destructor', 'end', 

90 'finalization', 'function', 'implementation', 'initialization', 

91 'label', 'library', 'operator', 'procedure', 'program', 'property', 

92 'record', 'threadvar', 'type', 'unit', 'uses', 'var' 

93 } 

94 

95 FUNCTION_MODIFIERS = { 

96 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe', 

97 'pascal', 'register', 'safecall', 'softfloat', 'stdcall', 

98 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external', 

99 'override', 'assembler' 

100 } 

101 

102 # XXX: those aren't global. but currently we know no way for defining 

103 # them just for the type context. 

104 DIRECTIVES = { 

105 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far', 

106 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected', 

107 'published', 'public' 

108 } 

109 

110 BUILTIN_TYPES = { 

111 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool', 

112 'cardinal', 'char', 'comp', 'currency', 'double', 'dword', 

113 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint', 

114 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean', 

115 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency', 

116 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle', 

117 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer', 

118 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint', 

119 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword', 

120 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint', 

121 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate', 

122 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant', 

123 'widechar', 'widestring', 'word', 'wordbool' 

124 } 

125 

126 BUILTIN_UNITS = { 

127 'System': ( 

128 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8', 

129 'append', 'arctan', 'assert', 'assigned', 'assignfile', 

130 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir', 

131 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble', 

132 'concat', 'continue', 'copy', 'cos', 'dec', 'delete', 

133 'dispose', 'doubletocomp', 'endthread', 'enummodules', 

134 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr', 

135 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize', 

136 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance', 

137 'findresourcehinstance', 'flush', 'frac', 'freemem', 

138 'get8087cw', 'getdir', 'getlasterror', 'getmem', 

139 'getmemorymanager', 'getmodulefilename', 'getvariantmanager', 

140 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert', 

141 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset', 

142 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd', 

143 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount', 

144 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random', 

145 'randomize', 'read', 'readln', 'reallocmem', 

146 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir', 

147 'round', 'runerror', 'seek', 'seekeof', 'seekeoln', 

148 'set8087cw', 'setlength', 'setlinebreakstyle', 

149 'setmemorymanager', 'setstring', 'settextbuf', 

150 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt', 

151 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar', 

152 'succ', 'swap', 'trunc', 'truncate', 'typeinfo', 

153 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring', 

154 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi', 

155 'utf8tounicode', 'val', 'vararrayredim', 'varclear', 

156 'widecharlentostring', 'widecharlentostrvar', 

157 'widechartostring', 'widechartostrvar', 

158 'widestringtoucs4string', 'write', 'writeln' 

159 ), 

160 'SysUtils': ( 

161 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks', 

162 'allocmem', 'ansicomparefilename', 'ansicomparestr', 

163 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr', 

164 'ansilastchar', 'ansilowercase', 'ansilowercasefilename', 

165 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext', 

166 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp', 

167 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan', 

168 'ansistrscan', 'ansistrupper', 'ansiuppercase', 

169 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep', 

170 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype', 

171 'callterminateprocs', 'changefileext', 'charlength', 

172 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr', 

173 'comparetext', 'createdir', 'createguid', 'currentyear', 

174 'currtostr', 'currtostrf', 'date', 'datetimetofiledate', 

175 'datetimetostr', 'datetimetostring', 'datetimetosystemtime', 

176 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate', 

177 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists', 

178 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime', 

179 'exceptionerrormessage', 'excludetrailingbackslash', 

180 'excludetrailingpathdelimiter', 'expandfilename', 

181 'expandfilenamecase', 'expanduncfilename', 'extractfiledir', 

182 'extractfiledrive', 'extractfileext', 'extractfilename', 

183 'extractfilepath', 'extractrelativepath', 'extractshortpathname', 

184 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime', 

185 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly', 

186 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr', 

187 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage', 

188 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext', 

189 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr', 

190 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr', 

191 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr', 

192 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir', 

193 'getenvironmentvariable', 'getfileversion', 'getformatsettings', 

194 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription', 

195 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth', 

196 'includetrailingbackslash', 'includetrailingpathdelimiter', 

197 'incmonth', 'initializepackage', 'interlockeddecrement', 

198 'interlockedexchange', 'interlockedexchangeadd', 

199 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter', 

200 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident', 

201 'languages', 'lastdelimiter', 'loadpackage', 'loadstr', 

202 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now', 

203 'outofmemoryerror', 'quotedstr', 'raiselastoserror', 

204 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate', 

205 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext', 

206 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize', 

207 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy', 

208 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp', 

209 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy', 

210 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew', 

211 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos', 

212 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr', 

213 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime', 

214 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint', 

215 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime', 

216 'strtotimedef', 'strupper', 'supports', 'syserrormessage', 

217 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime', 

218 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright', 

219 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime', 

220 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime', 

221 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime', 

222 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext', 

223 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase', 

224 'widesamestr', 'widesametext', 'wideuppercase', 'win32check', 

225 'wraptext' 

226 ), 

227 'Classes': ( 

228 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize', 

229 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect', 

230 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass', 

231 'groupdescendantswith', 'hextobin', 'identtoint', 

232 'initinheritedcomponent', 'inttoident', 'invalidpoint', 

233 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext', 

234 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource', 

235 'pointsequal', 'readcomponentres', 'readcomponentresex', 

236 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias', 

237 'registerclasses', 'registercomponents', 'registerintegerconsts', 

238 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup', 

239 'teststreamformat', 'unregisterclass', 'unregisterclasses', 

240 'unregisterintegerconsts', 'unregistermoduleclasses', 

241 'writecomponentresfile' 

242 ), 

243 'Math': ( 

244 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 

245 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil', 

246 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc', 

247 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle', 

248 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance', 

249 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask', 

250 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg', 

251 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate', 

252 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero', 

253 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue', 

254 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue', 

255 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods', 

256 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance', 

257 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd', 

258 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant', 

259 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode', 

260 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev', 

261 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation', 

262 'tan', 'tanh', 'totalvariance', 'variance' 

263 ) 

264 } 

265 

266 ASM_REGISTERS = { 

267 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0', 

268 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0', 

269 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx', 

270 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp', 

271 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 

272 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 

273 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 

274 'xmm6', 'xmm7' 

275 } 

276 

277 ASM_INSTRUCTIONS = { 

278 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound', 

279 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw', 

280 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae', 

281 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg', 

282 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb', 

283 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', 

284 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 

285 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb', 

286 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid', 

287 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt', 

288 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd', 

289 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd', 

290 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe', 

291 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle', 

292 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge', 

293 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe', 

294 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave', 

295 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw', 

296 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw', 

297 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr', 

298 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx', 

299 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 

300 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw', 

301 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw', 

302 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe', 

303 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror', 

304 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb', 

305 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe', 

306 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle', 

307 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng', 

308 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz', 

309 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl', 

310 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold', 

311 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str', 

312 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit', 

313 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait', 

314 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat', 

315 'xlatb', 'xor' 

316 } 

317 

318 PORTUGOL_KEYWORDS = ( 

319 'aleatorio', 

320 'algoritmo', 

321 'arquivo', 

322 'ate', 

323 'caso', 

324 'cronometro', 

325 'debug', 

326 'e', 

327 'eco', 

328 'enquanto', 

329 'entao', 

330 'escolha', 

331 'escreva', 

332 'escreval', 

333 'faca', 

334 'falso', 

335 'fimalgoritmo', 

336 'fimenquanto', 

337 'fimescolha', 

338 'fimfuncao', 

339 'fimpara', 

340 'fimprocedimento', 

341 'fimrepita', 

342 'fimse', 

343 'funcao', 

344 'inicio', 

345 'int', 

346 'interrompa', 

347 'leia', 

348 'limpatela', 

349 'mod', 

350 'nao', 

351 'ou', 

352 'outrocaso', 

353 'para', 

354 'passo', 

355 'pausa', 

356 'procedimento', 

357 'repita', 

358 'retorne', 

359 'se', 

360 'senao', 

361 'timer', 

362 'var', 

363 'vetor', 

364 'verdadeiro', 

365 'xou', 

366 'div', 

367 'mod', 

368 'abs', 

369 'arccos', 

370 'arcsen', 

371 'arctan', 

372 'cos', 

373 'cotan', 

374 'Exp', 

375 'grauprad', 

376 'int', 

377 'log', 

378 'logn', 

379 'pi', 

380 'quad', 

381 'radpgrau', 

382 'raizq', 

383 'rand', 

384 'randi', 

385 'sen', 

386 'Tan', 

387 'asc', 

388 'carac', 

389 'caracpnum', 

390 'compr', 

391 'copia', 

392 'maiusc', 

393 'minusc', 

394 'numpcarac', 

395 'pos', 

396 ) 

397 

398 PORTUGOL_BUILTIN_TYPES = { 

399 'inteiro', 'real', 'caractere', 'logico' 

400 } 

401 

402 def __init__(self, **options): 

403 Lexer.__init__(self, **options) 

404 self.keywords = set() 

405 self.builtins = set() 

406 if get_bool_opt(options, 'portugol', False): 

407 self.keywords.update(self.PORTUGOL_KEYWORDS) 

408 self.builtins.update(self.PORTUGOL_BUILTIN_TYPES) 

409 self.is_portugol = True 

410 else: 

411 self.is_portugol = False 

412 

413 if get_bool_opt(options, 'turbopascal', True): 

414 self.keywords.update(self.TURBO_PASCAL_KEYWORDS) 

415 if get_bool_opt(options, 'delphi', True): 

416 self.keywords.update(self.DELPHI_KEYWORDS) 

417 if get_bool_opt(options, 'freepascal', True): 

418 self.keywords.update(self.FREE_PASCAL_KEYWORDS) 

419 for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)): 

420 self.builtins.update(self.BUILTIN_UNITS[unit]) 

421 

422 def get_tokens_unprocessed(self, text): 

423 scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE) 

424 stack = ['initial'] 

425 in_function_block = False 

426 in_property_block = False 

427 was_dot = False 

428 next_token_is_function = False 

429 next_token_is_property = False 

430 collect_labels = False 

431 block_labels = set() 

432 brace_balance = [0, 0] 

433 

434 while not scanner.eos: 

435 token = Error 

436 

437 if stack[-1] == 'initial': 

438 if scanner.scan(r'\s+'): 

439 token = Whitespace 

440 elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): 

441 if scanner.match.startswith('$'): 

442 token = Comment.Preproc 

443 else: 

444 token = Comment.Multiline 

445 elif scanner.scan(r'//.*?$'): 

446 token = Comment.Single 

447 elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'): 

448 token = Operator 

449 elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'): 

450 token = Operator 

451 # stop label highlighting on next ";" 

452 if collect_labels and scanner.match == ';': 

453 collect_labels = False 

454 elif scanner.scan(r'[\(\)\[\]]+'): 

455 token = Punctuation 

456 # abort function naming ``foo = Function(...)`` 

457 next_token_is_function = False 

458 # if we are in a function block we count the open 

459 # braces because ootherwise it's impossible to 

460 # determine the end of the modifier context 

461 if in_function_block or in_property_block: 

462 if scanner.match == '(': 

463 brace_balance[0] += 1 

464 elif scanner.match == ')': 

465 brace_balance[0] -= 1 

466 elif scanner.match == '[': 

467 brace_balance[1] += 1 

468 elif scanner.match == ']': 

469 brace_balance[1] -= 1 

470 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): 

471 lowercase_name = scanner.match.lower() 

472 if lowercase_name == 'result': 

473 token = Name.Builtin.Pseudo 

474 elif lowercase_name in self.keywords: 

475 token = Keyword 

476 # if we are in a special block and a 

477 # block ending keyword occurs (and the parenthesis 

478 # is balanced) we end the current block context 

479 if self.is_portugol: 

480 if lowercase_name in ('funcao', 'procedimento'): 

481 in_function_block = True 

482 next_token_is_function = True 

483 else: 

484 if (in_function_block or in_property_block) and \ 

485 lowercase_name in self.BLOCK_KEYWORDS and \ 

486 brace_balance[0] <= 0 and \ 

487 brace_balance[1] <= 0: 

488 in_function_block = False 

489 in_property_block = False 

490 brace_balance = [0, 0] 

491 block_labels = set() 

492 if lowercase_name in ('label', 'goto'): 

493 collect_labels = True 

494 elif lowercase_name == 'asm': 

495 stack.append('asm') 

496 elif lowercase_name == 'property': 

497 in_property_block = True 

498 next_token_is_property = True 

499 elif lowercase_name in ('procedure', 'operator', 

500 'function', 'constructor', 

501 'destructor'): 

502 in_function_block = True 

503 next_token_is_function = True 

504 # we are in a function block and the current name 

505 # is in the set of registered modifiers. highlight 

506 # it as pseudo keyword 

507 elif not self.is_portugol and in_function_block and \ 

508 lowercase_name in self.FUNCTION_MODIFIERS: 

509 token = Keyword.Pseudo 

510 # if we are in a property highlight some more 

511 # modifiers 

512 elif not self.is_portugol and in_property_block and \ 

513 lowercase_name in ('read', 'write'): 

514 token = Keyword.Pseudo 

515 next_token_is_function = True 

516 # if the last iteration set next_token_is_function 

517 # to true we now want this name highlighted as 

518 # function. so do that and reset the state 

519 elif next_token_is_function: 

520 # Look if the next token is a dot. If yes it's 

521 # not a function, but a class name and the 

522 # part after the dot a function name 

523 if not self.is_portugol and scanner.test(r'\s*\.\s*'): 

524 token = Name.Class 

525 # it's not a dot, our job is done 

526 else: 

527 token = Name.Function 

528 next_token_is_function = False 

529 

530 if self.is_portugol: 

531 block_labels.add(scanner.match.lower()) 

532 

533 # same for properties 

534 elif not self.is_portugol and next_token_is_property: 

535 token = Name.Property 

536 next_token_is_property = False 

537 # Highlight this token as label and add it 

538 # to the list of known labels 

539 elif not self.is_portugol and collect_labels: 

540 token = Name.Label 

541 block_labels.add(scanner.match.lower()) 

542 # name is in list of known labels 

543 elif lowercase_name in block_labels: 

544 token = Name.Label 

545 elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES: 

546 token = Keyword.Type 

547 elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES: 

548 token = Keyword.Type 

549 elif not self.is_portugol and lowercase_name in self.DIRECTIVES: 

550 token = Keyword.Pseudo 

551 # builtins are just builtins if the token 

552 # before isn't a dot 

553 elif not self.is_portugol and not was_dot and lowercase_name in self.builtins: 

554 token = Name.Builtin 

555 else: 

556 token = Name 

557 elif self.is_portugol and scanner.scan(r"\""): 

558 token = String 

559 stack.append('string') 

560 elif not self.is_portugol and scanner.scan(r"'"): 

561 token = String 

562 stack.append('string') 

563 elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'): 

564 token = String.Char 

565 elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'): 

566 token = Number.Hex 

567 elif scanner.scan(r'\d+(?![eE]|\.[^.])'): 

568 token = Number.Integer 

569 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): 

570 token = Number.Float 

571 else: 

572 # if the stack depth is deeper than once, pop 

573 if len(stack) > 1: 

574 stack.pop() 

575 scanner.get_char() 

576 

577 elif stack[-1] == 'string': 

578 if self.is_portugol: 

579 if scanner.scan(r"''"): 

580 token = String.Escape 

581 elif scanner.scan(r"\""): 

582 token = String 

583 stack.pop() 

584 elif scanner.scan(r"[^\"]*"): 

585 token = String 

586 else: 

587 scanner.get_char() 

588 stack.pop() 

589 else: 

590 if scanner.scan(r"''"): 

591 token = String.Escape 

592 elif scanner.scan(r"'"): 

593 token = String 

594 stack.pop() 

595 elif scanner.scan(r"[^']*"): 

596 token = String 

597 else: 

598 scanner.get_char() 

599 stack.pop() 

600 elif not self.is_portugol and stack[-1] == 'asm': 

601 if scanner.scan(r'\s+'): 

602 token = Whitespace 

603 elif scanner.scan(r'end'): 

604 token = Keyword 

605 stack.pop() 

606 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): 

607 if scanner.match.startswith('$'): 

608 token = Comment.Preproc 

609 else: 

610 token = Comment.Multiline 

611 elif scanner.scan(r'//.*?$'): 

612 token = Comment.Single 

613 elif scanner.scan(r"'"): 

614 token = String 

615 stack.append('string') 

616 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'): 

617 token = Name.Label 

618 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): 

619 lowercase_name = scanner.match.lower() 

620 if lowercase_name in self.ASM_INSTRUCTIONS: 

621 token = Keyword 

622 elif lowercase_name in self.ASM_REGISTERS: 

623 token = Name.Builtin 

624 else: 

625 token = Name 

626 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'): 

627 token = Operator 

628 elif scanner.scan(r'[\(\)\[\]]+'): 

629 token = Punctuation 

630 elif scanner.scan(r'\$[0-9A-Fa-f]+'): 

631 token = Number.Hex 

632 elif scanner.scan(r'\d+(?![eE]|\.[^.])'): 

633 token = Number.Integer 

634 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): 

635 token = Number.Float 

636 else: 

637 scanner.get_char() 

638 stack.pop() 

639 

640 # save the dot!!!11 

641 if not self.is_portugol and scanner.match.strip(): 

642 was_dot = scanner.match == '.' 

643 

644 yield scanner.start_pos, token, scanner.match or ''