Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/pascal.py: 48%

222 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:07 +0000

1""" 

2 pygments.lexers.pascal 

3 ~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for Pascal family languages. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexer import Lexer 

14from pygments.util import get_bool_opt, get_list_opt 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Punctuation, Error, Whitespace 

17from pygments.scanner import Scanner 

18 

19# compatibility import 

20from pygments.lexers.modula2 import Modula2Lexer 

21 

22__all__ = ['DelphiLexer', 'PortugolLexer'] 

23 

24 

25class PortugolLexer(Lexer): 

26 """For Portugol, a Pascal dialect with keywords in Portuguese.""" 

27 name = 'Portugol' 

28 aliases = ['portugol'] 

29 filenames = ['*.alg', '*.portugol'] 

30 mimetypes = [] 

31 url = "https://www.apoioinformatica.inf.br/produtos/visualg/linguagem" 

32 

33 def __init__(self, **options): 

34 Lexer.__init__(self, **options) 

35 self.lexer = DelphiLexer(**options, portugol=True) 

36 

37 def get_tokens_unprocessed(self, text): 

38 return self.lexer.get_tokens_unprocessed(text) 

39 

40 

41class DelphiLexer(Lexer): 

42 """ 

43 For Delphi (Borland Object Pascal), 

44 Turbo Pascal and Free Pascal source code. 

45 

46 Additional options accepted: 

47 

48 `turbopascal` 

49 Highlight Turbo Pascal specific keywords (default: ``True``). 

50 `delphi` 

51 Highlight Borland Delphi specific keywords (default: ``True``). 

52 `freepascal` 

53 Highlight Free Pascal specific keywords (default: ``True``). 

54 `units` 

55 A list of units that should be considered builtin, supported are 

56 ``System``, ``SysUtils``, ``Classes`` and ``Math``. 

57 Default is to consider all of them builtin. 

58 """ 

59 name = 'Delphi' 

60 aliases = ['delphi', 'pas', 'pascal', 'objectpascal'] 

61 filenames = ['*.pas', '*.dpr'] 

62 mimetypes = ['text/x-pascal'] 

63 

64 TURBO_PASCAL_KEYWORDS = ( 

65 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case', 

66 'const', 'constructor', 'continue', 'destructor', 'div', 'do', 

67 'downto', 'else', 'end', 'file', 'for', 'function', 'goto', 

68 'if', 'implementation', 'in', 'inherited', 'inline', 'interface', 

69 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator', 

70 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce', 

71 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to', 

72 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor' 

73 ) 

74 

75 DELPHI_KEYWORDS = ( 

76 'as', 'class', 'except', 'exports', 'finalization', 'finally', 

77 'initialization', 'is', 'library', 'on', 'property', 'raise', 

78 'threadvar', 'try' 

79 ) 

80 

81 FREE_PASCAL_KEYWORDS = ( 

82 'dispose', 'exit', 'false', 'new', 'true' 

83 ) 

84 

85 BLOCK_KEYWORDS = { 

86 'begin', 'class', 'const', 'constructor', 'destructor', 'end', 

87 'finalization', 'function', 'implementation', 'initialization', 

88 'label', 'library', 'operator', 'procedure', 'program', 'property', 

89 'record', 'threadvar', 'type', 'unit', 'uses', 'var' 

90 } 

91 

92 FUNCTION_MODIFIERS = { 

93 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe', 

94 'pascal', 'register', 'safecall', 'softfloat', 'stdcall', 

95 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external', 

96 'override', 'assembler' 

97 } 

98 

99 # XXX: those aren't global. but currently we know no way for defining 

100 # them just for the type context. 

101 DIRECTIVES = { 

102 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far', 

103 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected', 

104 'published', 'public' 

105 } 

106 

107 BUILTIN_TYPES = { 

108 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool', 

109 'cardinal', 'char', 'comp', 'currency', 'double', 'dword', 

110 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint', 

111 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean', 

112 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency', 

113 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle', 

114 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer', 

115 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint', 

116 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword', 

117 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint', 

118 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate', 

119 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant', 

120 'widechar', 'widestring', 'word', 'wordbool' 

121 } 

122 

123 BUILTIN_UNITS = { 

124 'System': ( 

125 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8', 

126 'append', 'arctan', 'assert', 'assigned', 'assignfile', 

127 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir', 

128 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble', 

129 'concat', 'continue', 'copy', 'cos', 'dec', 'delete', 

130 'dispose', 'doubletocomp', 'endthread', 'enummodules', 

131 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr', 

132 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize', 

133 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance', 

134 'findresourcehinstance', 'flush', 'frac', 'freemem', 

135 'get8087cw', 'getdir', 'getlasterror', 'getmem', 

136 'getmemorymanager', 'getmodulefilename', 'getvariantmanager', 

137 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert', 

138 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset', 

139 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd', 

140 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount', 

141 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random', 

142 'randomize', 'read', 'readln', 'reallocmem', 

143 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir', 

144 'round', 'runerror', 'seek', 'seekeof', 'seekeoln', 

145 'set8087cw', 'setlength', 'setlinebreakstyle', 

146 'setmemorymanager', 'setstring', 'settextbuf', 

147 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt', 

148 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar', 

149 'succ', 'swap', 'trunc', 'truncate', 'typeinfo', 

150 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring', 

151 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi', 

152 'utf8tounicode', 'val', 'vararrayredim', 'varclear', 

153 'widecharlentostring', 'widecharlentostrvar', 

154 'widechartostring', 'widechartostrvar', 

155 'widestringtoucs4string', 'write', 'writeln' 

156 ), 

157 'SysUtils': ( 

158 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks', 

159 'allocmem', 'ansicomparefilename', 'ansicomparestr', 

160 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr', 

161 'ansilastchar', 'ansilowercase', 'ansilowercasefilename', 

162 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext', 

163 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp', 

164 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan', 

165 'ansistrscan', 'ansistrupper', 'ansiuppercase', 

166 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep', 

167 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype', 

168 'callterminateprocs', 'changefileext', 'charlength', 

169 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr', 

170 'comparetext', 'createdir', 'createguid', 'currentyear', 

171 'currtostr', 'currtostrf', 'date', 'datetimetofiledate', 

172 'datetimetostr', 'datetimetostring', 'datetimetosystemtime', 

173 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate', 

174 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists', 

175 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime', 

176 'exceptionerrormessage', 'excludetrailingbackslash', 

177 'excludetrailingpathdelimiter', 'expandfilename', 

178 'expandfilenamecase', 'expanduncfilename', 'extractfiledir', 

179 'extractfiledrive', 'extractfileext', 'extractfilename', 

180 'extractfilepath', 'extractrelativepath', 'extractshortpathname', 

181 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime', 

182 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly', 

183 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr', 

184 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage', 

185 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext', 

186 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr', 

187 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr', 

188 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr', 

189 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir', 

190 'getenvironmentvariable', 'getfileversion', 'getformatsettings', 

191 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription', 

192 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth', 

193 'includetrailingbackslash', 'includetrailingpathdelimiter', 

194 'incmonth', 'initializepackage', 'interlockeddecrement', 

195 'interlockedexchange', 'interlockedexchangeadd', 

196 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter', 

197 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident', 

198 'languages', 'lastdelimiter', 'loadpackage', 'loadstr', 

199 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now', 

200 'outofmemoryerror', 'quotedstr', 'raiselastoserror', 

201 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate', 

202 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext', 

203 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize', 

204 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy', 

205 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp', 

206 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy', 

207 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew', 

208 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos', 

209 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr', 

210 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime', 

211 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint', 

212 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime', 

213 'strtotimedef', 'strupper', 'supports', 'syserrormessage', 

214 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime', 

215 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright', 

216 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime', 

217 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime', 

218 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime', 

219 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext', 

220 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase', 

221 'widesamestr', 'widesametext', 'wideuppercase', 'win32check', 

222 'wraptext' 

223 ), 

224 'Classes': ( 

225 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize', 

226 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect', 

227 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass', 

228 'groupdescendantswith', 'hextobin', 'identtoint', 

229 'initinheritedcomponent', 'inttoident', 'invalidpoint', 

230 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext', 

231 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource', 

232 'pointsequal', 'readcomponentres', 'readcomponentresex', 

233 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias', 

234 'registerclasses', 'registercomponents', 'registerintegerconsts', 

235 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup', 

236 'teststreamformat', 'unregisterclass', 'unregisterclasses', 

237 'unregisterintegerconsts', 'unregistermoduleclasses', 

238 'writecomponentresfile' 

239 ), 

240 'Math': ( 

241 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 

242 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil', 

243 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc', 

244 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle', 

245 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance', 

246 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask', 

247 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg', 

248 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate', 

249 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero', 

250 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue', 

251 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue', 

252 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods', 

253 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance', 

254 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd', 

255 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant', 

256 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode', 

257 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev', 

258 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation', 

259 'tan', 'tanh', 'totalvariance', 'variance' 

260 ) 

261 } 

262 

263 ASM_REGISTERS = { 

264 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0', 

265 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0', 

266 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx', 

267 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp', 

268 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6', 

269 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 

270 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 

271 'xmm6', 'xmm7' 

272 } 

273 

274 ASM_INSTRUCTIONS = { 

275 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound', 

276 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw', 

277 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae', 

278 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg', 

279 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb', 

280 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl', 

281 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo', 

282 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb', 

283 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid', 

284 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt', 

285 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd', 

286 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd', 

287 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe', 

288 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle', 

289 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge', 

290 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe', 

291 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave', 

292 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw', 

293 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw', 

294 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr', 

295 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx', 

296 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd', 

297 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw', 

298 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw', 

299 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe', 

300 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror', 

301 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb', 

302 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe', 

303 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle', 

304 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng', 

305 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz', 

306 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl', 

307 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold', 

308 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str', 

309 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit', 

310 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait', 

311 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat', 

312 'xlatb', 'xor' 

313 } 

314 

315 PORTUGOL_KEYWORDS = ( 

316 'aleatorio', 

317 'algoritmo', 

318 'arquivo', 

319 'ate', 

320 'caso', 

321 'cronometro', 

322 'debug', 

323 'e', 

324 'eco', 

325 'enquanto', 

326 'entao', 

327 'escolha', 

328 'escreva', 

329 'escreval', 

330 'faca', 

331 'falso', 

332 'fimalgoritmo', 

333 'fimenquanto', 

334 'fimescolha', 

335 'fimfuncao', 

336 'fimpara', 

337 'fimprocedimento', 

338 'fimrepita', 

339 'fimse', 

340 'funcao', 

341 'inicio', 

342 'int', 

343 'interrompa', 

344 'leia', 

345 'limpatela', 

346 'mod', 

347 'nao', 

348 'ou', 

349 'outrocaso', 

350 'para', 

351 'passo', 

352 'pausa', 

353 'procedimento', 

354 'repita', 

355 'retorne', 

356 'se', 

357 'senao', 

358 'timer', 

359 'var', 

360 'vetor', 

361 'verdadeiro', 

362 'xou', 

363 'div', 

364 'mod', 

365 'abs', 

366 'arccos', 

367 'arcsen', 

368 'arctan', 

369 'cos', 

370 'cotan', 

371 'Exp', 

372 'grauprad', 

373 'int', 

374 'log', 

375 'logn', 

376 'pi', 

377 'quad', 

378 'radpgrau', 

379 'raizq', 

380 'rand', 

381 'randi', 

382 'sen', 

383 'Tan', 

384 'asc', 

385 'carac', 

386 'caracpnum', 

387 'compr', 

388 'copia', 

389 'maiusc', 

390 'minusc', 

391 'numpcarac', 

392 'pos', 

393 ) 

394 

395 PORTUGOL_BUILTIN_TYPES = { 

396 'inteiro', 'real', 'caractere', 'logico' 

397 } 

398 

399 def __init__(self, **options): 

400 Lexer.__init__(self, **options) 

401 self.keywords = set() 

402 self.builtins = set() 

403 if get_bool_opt(options, 'portugol', False): 

404 self.keywords.update(self.PORTUGOL_KEYWORDS) 

405 self.builtins.update(self.PORTUGOL_BUILTIN_TYPES) 

406 self.is_portugol = True 

407 else: 

408 self.is_portugol = False 

409 

410 if get_bool_opt(options, 'turbopascal', True): 

411 self.keywords.update(self.TURBO_PASCAL_KEYWORDS) 

412 if get_bool_opt(options, 'delphi', True): 

413 self.keywords.update(self.DELPHI_KEYWORDS) 

414 if get_bool_opt(options, 'freepascal', True): 

415 self.keywords.update(self.FREE_PASCAL_KEYWORDS) 

416 for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)): 

417 self.builtins.update(self.BUILTIN_UNITS[unit]) 

418 

419 def get_tokens_unprocessed(self, text): 

420 scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE) 

421 stack = ['initial'] 

422 in_function_block = False 

423 in_property_block = False 

424 was_dot = False 

425 next_token_is_function = False 

426 next_token_is_property = False 

427 collect_labels = False 

428 block_labels = set() 

429 brace_balance = [0, 0] 

430 

431 while not scanner.eos: 

432 token = Error 

433 

434 if stack[-1] == 'initial': 

435 if scanner.scan(r'\s+'): 

436 token = Whitespace 

437 elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): 

438 if scanner.match.startswith('$'): 

439 token = Comment.Preproc 

440 else: 

441 token = Comment.Multiline 

442 elif scanner.scan(r'//.*?$'): 

443 token = Comment.Single 

444 elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'): 

445 token = Operator 

446 elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'): 

447 token = Operator 

448 # stop label highlighting on next ";" 

449 if collect_labels and scanner.match == ';': 

450 collect_labels = False 

451 elif scanner.scan(r'[\(\)\[\]]+'): 

452 token = Punctuation 

453 # abort function naming ``foo = Function(...)`` 

454 next_token_is_function = False 

455 # if we are in a function block we count the open 

456 # braces because ootherwise it's impossible to 

457 # determine the end of the modifier context 

458 if in_function_block or in_property_block: 

459 if scanner.match == '(': 

460 brace_balance[0] += 1 

461 elif scanner.match == ')': 

462 brace_balance[0] -= 1 

463 elif scanner.match == '[': 

464 brace_balance[1] += 1 

465 elif scanner.match == ']': 

466 brace_balance[1] -= 1 

467 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): 

468 lowercase_name = scanner.match.lower() 

469 if lowercase_name == 'result': 

470 token = Name.Builtin.Pseudo 

471 elif lowercase_name in self.keywords: 

472 token = Keyword 

473 # if we are in a special block and a 

474 # block ending keyword occurs (and the parenthesis 

475 # is balanced) we end the current block context 

476 if self.is_portugol: 

477 if lowercase_name in ('funcao', 'procedimento'): 

478 in_function_block = True 

479 next_token_is_function = True 

480 else: 

481 if (in_function_block or in_property_block) and \ 

482 lowercase_name in self.BLOCK_KEYWORDS and \ 

483 brace_balance[0] <= 0 and \ 

484 brace_balance[1] <= 0: 

485 in_function_block = False 

486 in_property_block = False 

487 brace_balance = [0, 0] 

488 block_labels = set() 

489 if lowercase_name in ('label', 'goto'): 

490 collect_labels = True 

491 elif lowercase_name == 'asm': 

492 stack.append('asm') 

493 elif lowercase_name == 'property': 

494 in_property_block = True 

495 next_token_is_property = True 

496 elif lowercase_name in ('procedure', 'operator', 

497 'function', 'constructor', 

498 'destructor'): 

499 in_function_block = True 

500 next_token_is_function = True 

501 # we are in a function block and the current name 

502 # is in the set of registered modifiers. highlight 

503 # it as pseudo keyword 

504 elif not self.is_portugol and in_function_block and \ 

505 lowercase_name in self.FUNCTION_MODIFIERS: 

506 token = Keyword.Pseudo 

507 # if we are in a property highlight some more 

508 # modifiers 

509 elif not self.is_portugol and in_property_block and \ 

510 lowercase_name in ('read', 'write'): 

511 token = Keyword.Pseudo 

512 next_token_is_function = True 

513 # if the last iteration set next_token_is_function 

514 # to true we now want this name highlighted as 

515 # function. so do that and reset the state 

516 elif next_token_is_function: 

517 # Look if the next token is a dot. If yes it's 

518 # not a function, but a class name and the 

519 # part after the dot a function name 

520 if not self.is_portugol and scanner.test(r'\s*\.\s*'): 

521 token = Name.Class 

522 # it's not a dot, our job is done 

523 else: 

524 token = Name.Function 

525 next_token_is_function = False 

526 

527 if self.is_portugol: 

528 block_labels.add(scanner.match.lower()) 

529 

530 # same for properties 

531 elif not self.is_portugol and next_token_is_property: 

532 token = Name.Property 

533 next_token_is_property = False 

534 # Highlight this token as label and add it 

535 # to the list of known labels 

536 elif not self.is_portugol and collect_labels: 

537 token = Name.Label 

538 block_labels.add(scanner.match.lower()) 

539 # name is in list of known labels 

540 elif lowercase_name in block_labels: 

541 token = Name.Label 

542 elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES: 

543 token = Keyword.Type 

544 elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES: 

545 token = Keyword.Type 

546 elif not self.is_portugol and lowercase_name in self.DIRECTIVES: 

547 token = Keyword.Pseudo 

548 # builtins are just builtins if the token 

549 # before isn't a dot 

550 elif not self.is_portugol and not was_dot and lowercase_name in self.builtins: 

551 token = Name.Builtin 

552 else: 

553 token = Name 

554 elif self.is_portugol and scanner.scan(r"\""): 

555 token = String 

556 stack.append('string') 

557 elif not self.is_portugol and scanner.scan(r"'"): 

558 token = String 

559 stack.append('string') 

560 elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'): 

561 token = String.Char 

562 elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'): 

563 token = Number.Hex 

564 elif scanner.scan(r'\d+(?![eE]|\.[^.])'): 

565 token = Number.Integer 

566 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): 

567 token = Number.Float 

568 else: 

569 # if the stack depth is deeper than once, pop 

570 if len(stack) > 1: 

571 stack.pop() 

572 scanner.get_char() 

573 

574 elif stack[-1] == 'string': 

575 if self.is_portugol: 

576 if scanner.scan(r"''"): 

577 token = String.Escape 

578 elif scanner.scan(r"\""): 

579 token = String 

580 stack.pop() 

581 elif scanner.scan(r"[^\"]*"): 

582 token = String 

583 else: 

584 scanner.get_char() 

585 stack.pop() 

586 else: 

587 if scanner.scan(r"''"): 

588 token = String.Escape 

589 elif scanner.scan(r"'"): 

590 token = String 

591 stack.pop() 

592 elif scanner.scan(r"[^']*"): 

593 token = String 

594 else: 

595 scanner.get_char() 

596 stack.pop() 

597 elif not self.is_portugol and stack[-1] == 'asm': 

598 if scanner.scan(r'\s+'): 

599 token = Whitespace 

600 elif scanner.scan(r'end'): 

601 token = Keyword 

602 stack.pop() 

603 elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'): 

604 if scanner.match.startswith('$'): 

605 token = Comment.Preproc 

606 else: 

607 token = Comment.Multiline 

608 elif scanner.scan(r'//.*?$'): 

609 token = Comment.Single 

610 elif scanner.scan(r"'"): 

611 token = String 

612 stack.append('string') 

613 elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'): 

614 token = Name.Label 

615 elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'): 

616 lowercase_name = scanner.match.lower() 

617 if lowercase_name in self.ASM_INSTRUCTIONS: 

618 token = Keyword 

619 elif lowercase_name in self.ASM_REGISTERS: 

620 token = Name.Builtin 

621 else: 

622 token = Name 

623 elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'): 

624 token = Operator 

625 elif scanner.scan(r'[\(\)\[\]]+'): 

626 token = Punctuation 

627 elif scanner.scan(r'\$[0-9A-Fa-f]+'): 

628 token = Number.Hex 

629 elif scanner.scan(r'\d+(?![eE]|\.[^.])'): 

630 token = Number.Integer 

631 elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'): 

632 token = Number.Float 

633 else: 

634 scanner.get_char() 

635 stack.pop() 

636 

637 # save the dot!!!11 

638 if not self.is_portugol and scanner.match.strip(): 

639 was_dot = scanner.match == '.' 

640 

641 yield scanner.start_pos, token, scanner.match or ''