Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/modula2.py: 59%
224 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.modula2
3 ~~~~~~~~~~~~~~~~~~~~~~~
5 Multi-Dialect Lexer for Modula-2.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexer import RegexLexer, include
14from pygments.util import get_bool_opt, get_list_opt
15from pygments.token import Text, Comment, Operator, Keyword, Name, \
16 String, Number, Punctuation, Error
18__all__ = ['Modula2Lexer']
21# Multi-Dialect Modula-2 Lexer
22class Modula2Lexer(RegexLexer):
23 """
24 For Modula-2 source code.
26 The Modula-2 lexer supports several dialects. By default, it operates in
27 fallback mode, recognising the *combined* literals, punctuation symbols
28 and operators of all supported dialects, and the *combined* reserved words
29 and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
30 differentiating between library defined identifiers.
32 To select a specific dialect, a dialect option may be passed
33 or a dialect tag may be embedded into a source file.
35 Dialect Options:
37 `m2pim`
38 Select PIM Modula-2 dialect.
39 `m2iso`
40 Select ISO Modula-2 dialect.
41 `m2r10`
42 Select Modula-2 R10 dialect.
43 `objm2`
44 Select Objective Modula-2 dialect.
46 The PIM and ISO dialect options may be qualified with a language extension.
48 Language Extensions:
50 `+aglet`
51 Select Aglet Modula-2 extensions, available with m2iso.
52 `+gm2`
53 Select GNU Modula-2 extensions, available with m2pim.
54 `+p1`
55 Select p1 Modula-2 extensions, available with m2iso.
56 `+xds`
57 Select XDS Modula-2 extensions, available with m2iso.
60 Passing a Dialect Option via Unix Commandline Interface
62 Dialect options may be passed to the lexer using the `dialect` key.
63 Only one such option should be passed. If multiple dialect options are
64 passed, the first valid option is used, any subsequent options are ignored.
66 Examples:
68 `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
69 Use ISO dialect to render input to HTML output
70 `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
71 Use ISO dialect with p1 extensions to render input to RTF output
74 Embedding a Dialect Option within a source file
76 A dialect option may be embedded in a source file in form of a dialect
77 tag, a specially formatted comment that specifies a dialect option.
79 Dialect Tag EBNF::
81 dialectTag :
82 OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
84 dialectOption :
85 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
86 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
88 Prefix : '!' ;
90 OpeningCommentDelim : '(*' ;
92 ClosingCommentDelim : '*)' ;
94 No whitespace is permitted between the tokens of a dialect tag.
96 In the event that a source file contains multiple dialect tags, the first
97 tag that contains a valid dialect option will be used and any subsequent
98 dialect tags will be ignored. Ideally, a dialect tag should be placed
99 at the beginning of a source file.
101 An embedded dialect tag overrides a dialect option set via command line.
103 Examples:
105 ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
106 Use Modula2 R10 dialect to render this source file.
107 ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
108 Use PIM dialect with GNU extensions to render this source file.
111 Algol Publication Mode:
113 In Algol publication mode, source text is rendered for publication of
114 algorithms in scientific papers and academic texts, following the format
115 of the Revised Algol-60 Language Report. It is activated by passing
116 one of two corresponding styles as an option:
118 `algol`
119 render reserved words lowercase underline boldface
120 and builtins lowercase boldface italic
121 `algol_nu`
122 render reserved words lowercase boldface (no underlining)
123 and builtins lowercase boldface italic
125 The lexer automatically performs the required lowercase conversion when
126 this mode is activated.
128 Example:
130 ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
131 Render input file in Algol publication mode to LaTeX output.
134 Rendering Mode of First Class ADT Identifiers:
136 The rendering of standard library first class ADT identifiers is controlled
137 by option flag "treat_stdlib_adts_as_builtins".
139 When this option is turned on, standard library ADT identifiers are rendered
140 as builtins. When it is turned off, they are rendered as ordinary library
141 identifiers.
143 `treat_stdlib_adts_as_builtins` (default: On)
145 The option is useful for dialects that support ADTs as first class objects
146 and provide ADTs in the standard library that would otherwise be built-in.
148 At present, only Modula-2 R10 supports library ADTs as first class objects
149 and therefore, no ADT identifiers are defined for any other dialects.
151 Example:
153 ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
154 Render standard library ADTs as ordinary library types.
156 .. versionadded:: 1.3
158 .. versionchanged:: 2.1
159 Added multi-dialect support.
160 """
161 name = 'Modula-2'
162 url = 'http://www.modula2.org/'
163 aliases = ['modula2', 'm2']
164 filenames = ['*.def', '*.mod']
165 mimetypes = ['text/x-modula2']
167 flags = re.MULTILINE | re.DOTALL
169 tokens = {
170 'whitespace': [
171 (r'\n+', Text), # blank lines
172 (r'\s+', Text), # whitespace
173 ],
174 'dialecttags': [
175 # PIM Dialect Tag
176 (r'\(\*!m2pim\*\)', Comment.Special),
177 # ISO Dialect Tag
178 (r'\(\*!m2iso\*\)', Comment.Special),
179 # M2R10 Dialect Tag
180 (r'\(\*!m2r10\*\)', Comment.Special),
181 # ObjM2 Dialect Tag
182 (r'\(\*!objm2\*\)', Comment.Special),
183 # Aglet Extensions Dialect Tag
184 (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
185 # GNU Extensions Dialect Tag
186 (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
187 # p1 Extensions Dialect Tag
188 (r'\(\*!m2iso\+p1\*\)', Comment.Special),
189 # XDS Extensions Dialect Tag
190 (r'\(\*!m2iso\+xds\*\)', Comment.Special),
191 ],
192 'identifiers': [
193 (r'([a-zA-Z_$][\w$]*)', Name),
194 ],
195 'prefixed_number_literals': [
196 #
197 # Base-2, whole number
198 (r'0b[01]+(\'[01]+)*', Number.Bin),
199 #
200 # Base-16, whole number
201 (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
202 ],
203 'plain_number_literals': [
204 #
205 # Base-10, real number with exponent
206 (r'[0-9]+(\'[0-9]+)*' # integral part
207 r'\.[0-9]+(\'[0-9]+)*' # fractional part
208 r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent
209 Number.Float),
210 #
211 # Base-10, real number without exponent
212 (r'[0-9]+(\'[0-9]+)*' # integral part
213 r'\.[0-9]+(\'[0-9]+)*', # fractional part
214 Number.Float),
215 #
216 # Base-10, whole number
217 (r'[0-9]+(\'[0-9]+)*', Number.Integer),
218 ],
219 'suffixed_number_literals': [
220 #
221 # Base-8, whole number
222 (r'[0-7]+B', Number.Oct),
223 #
224 # Base-8, character code
225 (r'[0-7]+C', Number.Oct),
226 #
227 # Base-16, number
228 (r'[0-9A-F]+H', Number.Hex),
229 ],
230 'string_literals': [
231 (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
232 (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
233 ],
234 'digraph_operators': [
235 # Dot Product Operator
236 (r'\*\.', Operator),
237 # Array Concatenation Operator
238 (r'\+>', Operator), # M2R10 + ObjM2
239 # Inequality Operator
240 (r'<>', Operator), # ISO + PIM
241 # Less-Or-Equal, Subset
242 (r'<=', Operator),
243 # Greater-Or-Equal, Superset
244 (r'>=', Operator),
245 # Identity Operator
246 (r'==', Operator), # M2R10 + ObjM2
247 # Type Conversion Operator
248 (r'::', Operator), # M2R10 + ObjM2
249 # Assignment Symbol
250 (r':=', Operator),
251 # Postfix Increment Mutator
252 (r'\+\+', Operator), # M2R10 + ObjM2
253 # Postfix Decrement Mutator
254 (r'--', Operator), # M2R10 + ObjM2
255 ],
256 'unigraph_operators': [
257 # Arithmetic Operators
258 (r'[+-]', Operator),
259 (r'[*/]', Operator),
260 # ISO 80000-2 compliant Set Difference Operator
261 (r'\\', Operator), # M2R10 + ObjM2
262 # Relational Operators
263 (r'[=#<>]', Operator),
264 # Dereferencing Operator
265 (r'\^', Operator),
266 # Dereferencing Operator Synonym
267 (r'@', Operator), # ISO
268 # Logical AND Operator Synonym
269 (r'&', Operator), # PIM + ISO
270 # Logical NOT Operator Synonym
271 (r'~', Operator), # PIM + ISO
272 # Smalltalk Message Prefix
273 (r'`', Operator), # ObjM2
274 ],
275 'digraph_punctuation': [
276 # Range Constructor
277 (r'\.\.', Punctuation),
278 # Opening Chevron Bracket
279 (r'<<', Punctuation), # M2R10 + ISO
280 # Closing Chevron Bracket
281 (r'>>', Punctuation), # M2R10 + ISO
282 # Blueprint Punctuation
283 (r'->', Punctuation), # M2R10 + ISO
284 # Distinguish |# and # in M2 R10
285 (r'\|#', Punctuation),
286 # Distinguish ## and # in M2 R10
287 (r'##', Punctuation),
288 # Distinguish |* and * in M2 R10
289 (r'\|\*', Punctuation),
290 ],
291 'unigraph_punctuation': [
292 # Common Punctuation
293 (r'[()\[\]{},.:;|]', Punctuation),
294 # Case Label Separator Synonym
295 (r'!', Punctuation), # ISO
296 # Blueprint Punctuation
297 (r'\?', Punctuation), # M2R10 + ObjM2
298 ],
299 'comments': [
300 # Single Line Comment
301 (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2
302 # Block Comment
303 (r'\(\*([^$].*?)\*\)', Comment.Multiline),
304 # Template Block Comment
305 (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2
306 ],
307 'pragmas': [
308 # ISO Style Pragmas
309 (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2
310 # Pascal Style Pragmas
311 (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM
312 ],
313 'root': [
314 include('whitespace'),
315 include('dialecttags'),
316 include('pragmas'),
317 include('comments'),
318 include('identifiers'),
319 include('suffixed_number_literals'), # PIM + ISO
320 include('prefixed_number_literals'), # M2R10 + ObjM2
321 include('plain_number_literals'),
322 include('string_literals'),
323 include('digraph_punctuation'),
324 include('digraph_operators'),
325 include('unigraph_punctuation'),
326 include('unigraph_operators'),
327 ]
328 }
330# C o m m o n D a t a s e t s
332 # Common Reserved Words Dataset
333 common_reserved_words = (
334 # 37 common reserved words
335 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
336 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
337 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
338 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
339 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
340 )
342 # Common Builtins Dataset
343 common_builtins = (
344 # 16 common builtins
345 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
346 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
347 'TRUE',
348 )
350 # Common Pseudo-Module Builtins Dataset
351 common_pseudo_builtins = (
352 # 4 common pseudo builtins
353 'ADDRESS', 'BYTE', 'WORD', 'ADR'
354 )
356# P I M M o d u l a - 2 D a t a s e t s
358 # Lexemes to Mark as Error Tokens for PIM Modula-2
359 pim_lexemes_to_reject = (
360 '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
361 '+>', '->', '<<', '>>', '|#', '##',
362 )
364 # PIM Modula-2 Additional Reserved Words Dataset
365 pim_additional_reserved_words = (
366 # 3 additional reserved words
367 'EXPORT', 'QUALIFIED', 'WITH',
368 )
370 # PIM Modula-2 Additional Builtins Dataset
371 pim_additional_builtins = (
372 # 16 additional builtins
373 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
374 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
375 )
377 # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
378 pim_additional_pseudo_builtins = (
379 # 5 additional pseudo builtins
380 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
381 )
383# I S O M o d u l a - 2 D a t a s e t s
385 # Lexemes to Mark as Error Tokens for ISO Modula-2
386 iso_lexemes_to_reject = (
387 '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
388 '<<', '>>', '|#', '##',
389 )
391 # ISO Modula-2 Additional Reserved Words Dataset
392 iso_additional_reserved_words = (
393 # 9 additional reserved words (ISO 10514-1)
394 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
395 'REM', 'RETRY', 'WITH',
396 # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
397 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
398 'REVEAL', 'TRACED', 'UNSAFEGUARDED',
399 )
401 # ISO Modula-2 Additional Builtins Dataset
402 iso_additional_builtins = (
403 # 26 additional builtins (ISO 10514-1)
404 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
405 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH',
406 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
407 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
408 # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
409 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
410 )
412 # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
413 iso_additional_pseudo_builtins = (
414 # 14 additional builtins (SYSTEM)
415 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
416 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
417 'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
418 # 13 additional builtins (COROUTINES)
419 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
420 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
421 'NEWCOROUTINE', 'PROT', 'TRANSFER',
422 # 9 additional builtins (EXCEPTIONS)
423 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
424 'ExceptionSource', 'GetMessage', 'IsCurrentSource',
425 'IsExceptionalExecution', 'RAISE',
426 # 3 additional builtins (TERMINATION)
427 'TERMINATION', 'IsTerminating', 'HasHalted',
428 # 4 additional builtins (M2EXCEPTION)
429 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
430 'indexException', 'rangeException', 'caseSelectException',
431 'invalidLocation', 'functionException', 'wholeValueException',
432 'wholeDivException', 'realValueException', 'realDivException',
433 'complexValueException', 'complexDivException', 'protException',
434 'sysException', 'coException', 'exException',
435 )
437# M o d u l a - 2 R 1 0 D a t a s e t s
439 # Lexemes to Mark as Error Tokens for Modula-2 R10
440 m2r10_lexemes_to_reject = (
441 '!', '`', '@', '$', '%', '&', '<>',
442 )
444 # Modula-2 R10 reserved words in addition to the common set
445 m2r10_additional_reserved_words = (
446 # 12 additional reserved words
447 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
448 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
449 # 2 additional reserved words with symbolic assembly option
450 'ASM', 'REG',
451 )
453 # Modula-2 R10 builtins in addition to the common set
454 m2r10_additional_builtins = (
455 # 26 additional builtins
456 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
457 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
458 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
459 'UNICHAR', 'WRITE', 'WRITEF',
460 )
462 # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
463 m2r10_additional_pseudo_builtins = (
464 # 13 additional builtins (TPROPERTIES)
465 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
466 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
467 'TMAXEXP', 'TMINEXP',
468 # 4 additional builtins (CONVERSION)
469 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
470 # 35 additional builtins (UNSAFE)
471 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
472 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
473 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
474 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
475 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
476 # 11 additional builtins (ATOMIC)
477 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
478 'BWNAND', 'BWOR', 'BWXOR',
479 # 7 additional builtins (COMPILER)
480 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
481 'HASH',
482 # 5 additional builtins (ASSEMBLER)
483 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
484 )
486# O b j e c t i v e M o d u l a - 2 D a t a s e t s
488 # Lexemes to Mark as Error Tokens for Objective Modula-2
489 objm2_lexemes_to_reject = (
490 '!', '$', '%', '&', '<>',
491 )
493 # Objective Modula-2 Extensions
494 # reserved words in addition to Modula-2 R10
495 objm2_additional_reserved_words = (
496 # 16 additional reserved words
497 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
498 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
499 'SUPER', 'TRY',
500 )
502 # Objective Modula-2 Extensions
503 # builtins in addition to Modula-2 R10
504 objm2_additional_builtins = (
505 # 3 additional builtins
506 'OBJECT', 'NO', 'YES',
507 )
509 # Objective Modula-2 Extensions
510 # pseudo-module builtins in addition to Modula-2 R10
511 objm2_additional_pseudo_builtins = (
512 # None
513 )
515# A g l e t M o d u l a - 2 D a t a s e t s
517 # Aglet Extensions
518 # reserved words in addition to ISO Modula-2
519 aglet_additional_reserved_words = (
520 # None
521 )
523 # Aglet Extensions
524 # builtins in addition to ISO Modula-2
525 aglet_additional_builtins = (
526 # 9 additional builtins
527 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
528 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
529 )
531 # Aglet Modula-2 Extensions
532 # pseudo-module builtins in addition to ISO Modula-2
533 aglet_additional_pseudo_builtins = (
534 # None
535 )
537# G N U M o d u l a - 2 D a t a s e t s
539 # GNU Extensions
540 # reserved words in addition to PIM Modula-2
541 gm2_additional_reserved_words = (
542 # 10 additional reserved words
543 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
544 '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
545 )
547 # GNU Extensions
548 # builtins in addition to PIM Modula-2
549 gm2_additional_builtins = (
550 # 21 additional builtins
551 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
552 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
553 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
554 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
555 )
557 # GNU Extensions
558 # pseudo-module builtins in addition to PIM Modula-2
559 gm2_additional_pseudo_builtins = (
560 # None
561 )
563# p 1 M o d u l a - 2 D a t a s e t s
565 # p1 Extensions
566 # reserved words in addition to ISO Modula-2
567 p1_additional_reserved_words = (
568 # None
569 )
571 # p1 Extensions
572 # builtins in addition to ISO Modula-2
573 p1_additional_builtins = (
574 # None
575 )
577 # p1 Modula-2 Extensions
578 # pseudo-module builtins in addition to ISO Modula-2
579 p1_additional_pseudo_builtins = (
580 # 1 additional builtin
581 'BCD',
582 )
584# X D S M o d u l a - 2 D a t a s e t s
586 # XDS Extensions
587 # reserved words in addition to ISO Modula-2
588 xds_additional_reserved_words = (
589 # 1 additional reserved word
590 'SEQ',
591 )
593 # XDS Extensions
594 # builtins in addition to ISO Modula-2
595 xds_additional_builtins = (
596 # 9 additional builtins
597 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
598 'LONGCARD', 'SHORTCARD', 'SHORTINT',
599 )
601 # XDS Modula-2 Extensions
602 # pseudo-module builtins in addition to ISO Modula-2
603 xds_additional_pseudo_builtins = (
604 # 22 additional builtins (SYSTEM)
605 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
606 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
607 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
608 # 3 additional builtins (COMPILER)
609 'COMPILER', 'OPTION', 'EQUATION'
610 )
612# P I M S t a n d a r d L i b r a r y D a t a s e t s
614 # PIM Modula-2 Standard Library Modules Dataset
615 pim_stdlib_module_identifiers = (
616 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
617 )
619 # PIM Modula-2 Standard Library Types Dataset
620 pim_stdlib_type_identifiers = (
621 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
622 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
623 'DirectoryCommand',
624 )
626 # PIM Modula-2 Standard Library Procedures Dataset
627 pim_stdlib_proc_identifiers = (
628 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
629 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
630 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
631 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
632 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
633 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
634 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
635 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
636 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
637 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
638 )
640 # PIM Modula-2 Standard Library Variables Dataset
641 pim_stdlib_var_identifiers = (
642 'Done', 'termCH', 'in', 'out'
643 )
645 # PIM Modula-2 Standard Library Constants Dataset
646 pim_stdlib_const_identifiers = (
647 'EOL',
648 )
650# I S O S t a n d a r d L i b r a r y D a t a s e t s
652 # ISO Modula-2 Standard Library Modules Dataset
653 iso_stdlib_module_identifiers = (
654 # TO DO
655 )
657 # ISO Modula-2 Standard Library Types Dataset
658 iso_stdlib_type_identifiers = (
659 # TO DO
660 )
662 # ISO Modula-2 Standard Library Procedures Dataset
663 iso_stdlib_proc_identifiers = (
664 # TO DO
665 )
667 # ISO Modula-2 Standard Library Variables Dataset
668 iso_stdlib_var_identifiers = (
669 # TO DO
670 )
672 # ISO Modula-2 Standard Library Constants Dataset
673 iso_stdlib_const_identifiers = (
674 # TO DO
675 )
677# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s
679 # Modula-2 R10 Standard Library ADTs Dataset
680 m2r10_stdlib_adt_identifiers = (
681 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
682 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
683 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
684 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
685 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
686 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
687 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
688 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
689 'INT64', 'INT128', 'STRING', 'UNISTRING',
690 )
692 # Modula-2 R10 Standard Library Blueprints Dataset
693 m2r10_stdlib_blueprint_identifiers = (
694 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
695 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
696 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
697 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
698 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
699 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
700 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
701 )
703 # Modula-2 R10 Standard Library Modules Dataset
704 m2r10_stdlib_module_identifiers = (
705 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
706 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
707 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
708 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
709 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
710 )
712 # Modula-2 R10 Standard Library Types Dataset
713 m2r10_stdlib_type_identifiers = (
714 'File', 'Status',
715 # TO BE COMPLETED
716 )
718 # Modula-2 R10 Standard Library Procedures Dataset
719 m2r10_stdlib_proc_identifiers = (
720 'ALLOCATE', 'DEALLOCATE', 'SIZE',
721 # TO BE COMPLETED
722 )
724 # Modula-2 R10 Standard Library Variables Dataset
725 m2r10_stdlib_var_identifiers = (
726 'stdIn', 'stdOut', 'stdErr',
727 )
729 # Modula-2 R10 Standard Library Constants Dataset
730 m2r10_stdlib_const_identifiers = (
731 'pi', 'tau',
732 )
734# D i a l e c t s
736 # Dialect modes
737 dialects = (
738 'unknown',
739 'm2pim', 'm2iso', 'm2r10', 'objm2',
740 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
741 )
743# D a t a b a s e s
745 # Lexemes to Mark as Errors Database
746 lexemes_to_reject_db = {
747 # Lexemes to reject for unknown dialect
748 'unknown': (
749 # LEAVE THIS EMPTY
750 ),
751 # Lexemes to reject for PIM Modula-2
752 'm2pim': (
753 pim_lexemes_to_reject,
754 ),
755 # Lexemes to reject for ISO Modula-2
756 'm2iso': (
757 iso_lexemes_to_reject,
758 ),
759 # Lexemes to reject for Modula-2 R10
760 'm2r10': (
761 m2r10_lexemes_to_reject,
762 ),
763 # Lexemes to reject for Objective Modula-2
764 'objm2': (
765 objm2_lexemes_to_reject,
766 ),
767 # Lexemes to reject for Aglet Modula-2
768 'm2iso+aglet': (
769 iso_lexemes_to_reject,
770 ),
771 # Lexemes to reject for GNU Modula-2
772 'm2pim+gm2': (
773 pim_lexemes_to_reject,
774 ),
775 # Lexemes to reject for p1 Modula-2
776 'm2iso+p1': (
777 iso_lexemes_to_reject,
778 ),
779 # Lexemes to reject for XDS Modula-2
780 'm2iso+xds': (
781 iso_lexemes_to_reject,
782 ),
783 }
785 # Reserved Words Database
786 reserved_words_db = {
787 # Reserved words for unknown dialect
788 'unknown': (
789 common_reserved_words,
790 pim_additional_reserved_words,
791 iso_additional_reserved_words,
792 m2r10_additional_reserved_words,
793 ),
795 # Reserved words for PIM Modula-2
796 'm2pim': (
797 common_reserved_words,
798 pim_additional_reserved_words,
799 ),
801 # Reserved words for Modula-2 R10
802 'm2iso': (
803 common_reserved_words,
804 iso_additional_reserved_words,
805 ),
807 # Reserved words for ISO Modula-2
808 'm2r10': (
809 common_reserved_words,
810 m2r10_additional_reserved_words,
811 ),
813 # Reserved words for Objective Modula-2
814 'objm2': (
815 common_reserved_words,
816 m2r10_additional_reserved_words,
817 objm2_additional_reserved_words,
818 ),
820 # Reserved words for Aglet Modula-2 Extensions
821 'm2iso+aglet': (
822 common_reserved_words,
823 iso_additional_reserved_words,
824 aglet_additional_reserved_words,
825 ),
827 # Reserved words for GNU Modula-2 Extensions
828 'm2pim+gm2': (
829 common_reserved_words,
830 pim_additional_reserved_words,
831 gm2_additional_reserved_words,
832 ),
834 # Reserved words for p1 Modula-2 Extensions
835 'm2iso+p1': (
836 common_reserved_words,
837 iso_additional_reserved_words,
838 p1_additional_reserved_words,
839 ),
841 # Reserved words for XDS Modula-2 Extensions
842 'm2iso+xds': (
843 common_reserved_words,
844 iso_additional_reserved_words,
845 xds_additional_reserved_words,
846 ),
847 }
849 # Builtins Database
850 builtins_db = {
851 # Builtins for unknown dialect
852 'unknown': (
853 common_builtins,
854 pim_additional_builtins,
855 iso_additional_builtins,
856 m2r10_additional_builtins,
857 ),
859 # Builtins for PIM Modula-2
860 'm2pim': (
861 common_builtins,
862 pim_additional_builtins,
863 ),
865 # Builtins for ISO Modula-2
866 'm2iso': (
867 common_builtins,
868 iso_additional_builtins,
869 ),
871 # Builtins for ISO Modula-2
872 'm2r10': (
873 common_builtins,
874 m2r10_additional_builtins,
875 ),
877 # Builtins for Objective Modula-2
878 'objm2': (
879 common_builtins,
880 m2r10_additional_builtins,
881 objm2_additional_builtins,
882 ),
884 # Builtins for Aglet Modula-2 Extensions
885 'm2iso+aglet': (
886 common_builtins,
887 iso_additional_builtins,
888 aglet_additional_builtins,
889 ),
891 # Builtins for GNU Modula-2 Extensions
892 'm2pim+gm2': (
893 common_builtins,
894 pim_additional_builtins,
895 gm2_additional_builtins,
896 ),
898 # Builtins for p1 Modula-2 Extensions
899 'm2iso+p1': (
900 common_builtins,
901 iso_additional_builtins,
902 p1_additional_builtins,
903 ),
905 # Builtins for XDS Modula-2 Extensions
906 'm2iso+xds': (
907 common_builtins,
908 iso_additional_builtins,
909 xds_additional_builtins,
910 ),
911 }
913 # Pseudo-Module Builtins Database
914 pseudo_builtins_db = {
915 # Builtins for unknown dialect
916 'unknown': (
917 common_pseudo_builtins,
918 pim_additional_pseudo_builtins,
919 iso_additional_pseudo_builtins,
920 m2r10_additional_pseudo_builtins,
921 ),
923 # Builtins for PIM Modula-2
924 'm2pim': (
925 common_pseudo_builtins,
926 pim_additional_pseudo_builtins,
927 ),
929 # Builtins for ISO Modula-2
930 'm2iso': (
931 common_pseudo_builtins,
932 iso_additional_pseudo_builtins,
933 ),
935 # Builtins for ISO Modula-2
936 'm2r10': (
937 common_pseudo_builtins,
938 m2r10_additional_pseudo_builtins,
939 ),
941 # Builtins for Objective Modula-2
942 'objm2': (
943 common_pseudo_builtins,
944 m2r10_additional_pseudo_builtins,
945 objm2_additional_pseudo_builtins,
946 ),
948 # Builtins for Aglet Modula-2 Extensions
949 'm2iso+aglet': (
950 common_pseudo_builtins,
951 iso_additional_pseudo_builtins,
952 aglet_additional_pseudo_builtins,
953 ),
955 # Builtins for GNU Modula-2 Extensions
956 'm2pim+gm2': (
957 common_pseudo_builtins,
958 pim_additional_pseudo_builtins,
959 gm2_additional_pseudo_builtins,
960 ),
962 # Builtins for p1 Modula-2 Extensions
963 'm2iso+p1': (
964 common_pseudo_builtins,
965 iso_additional_pseudo_builtins,
966 p1_additional_pseudo_builtins,
967 ),
969 # Builtins for XDS Modula-2 Extensions
970 'm2iso+xds': (
971 common_pseudo_builtins,
972 iso_additional_pseudo_builtins,
973 xds_additional_pseudo_builtins,
974 ),
975 }
977 # Standard Library ADTs Database
978 stdlib_adts_db = {
979 # Empty entry for unknown dialect
980 'unknown': (
981 # LEAVE THIS EMPTY
982 ),
983 # Standard Library ADTs for PIM Modula-2
984 'm2pim': (
985 # No first class library types
986 ),
988 # Standard Library ADTs for ISO Modula-2
989 'm2iso': (
990 # No first class library types
991 ),
993 # Standard Library ADTs for Modula-2 R10
994 'm2r10': (
995 m2r10_stdlib_adt_identifiers,
996 ),
998 # Standard Library ADTs for Objective Modula-2
999 'objm2': (
1000 m2r10_stdlib_adt_identifiers,
1001 ),
1003 # Standard Library ADTs for Aglet Modula-2
1004 'm2iso+aglet': (
1005 # No first class library types
1006 ),
1008 # Standard Library ADTs for GNU Modula-2
1009 'm2pim+gm2': (
1010 # No first class library types
1011 ),
1013 # Standard Library ADTs for p1 Modula-2
1014 'm2iso+p1': (
1015 # No first class library types
1016 ),
1018 # Standard Library ADTs for XDS Modula-2
1019 'm2iso+xds': (
1020 # No first class library types
1021 ),
1022 }
1024 # Standard Library Modules Database
1025 stdlib_modules_db = {
1026 # Empty entry for unknown dialect
1027 'unknown': (
1028 # LEAVE THIS EMPTY
1029 ),
1030 # Standard Library Modules for PIM Modula-2
1031 'm2pim': (
1032 pim_stdlib_module_identifiers,
1033 ),
1035 # Standard Library Modules for ISO Modula-2
1036 'm2iso': (
1037 iso_stdlib_module_identifiers,
1038 ),
1040 # Standard Library Modules for Modula-2 R10
1041 'm2r10': (
1042 m2r10_stdlib_blueprint_identifiers,
1043 m2r10_stdlib_module_identifiers,
1044 m2r10_stdlib_adt_identifiers,
1045 ),
1047 # Standard Library Modules for Objective Modula-2
1048 'objm2': (
1049 m2r10_stdlib_blueprint_identifiers,
1050 m2r10_stdlib_module_identifiers,
1051 ),
1053 # Standard Library Modules for Aglet Modula-2
1054 'm2iso+aglet': (
1055 iso_stdlib_module_identifiers,
1056 ),
1058 # Standard Library Modules for GNU Modula-2
1059 'm2pim+gm2': (
1060 pim_stdlib_module_identifiers,
1061 ),
1063 # Standard Library Modules for p1 Modula-2
1064 'm2iso+p1': (
1065 iso_stdlib_module_identifiers,
1066 ),
1068 # Standard Library Modules for XDS Modula-2
1069 'm2iso+xds': (
1070 iso_stdlib_module_identifiers,
1071 ),
1072 }
1074 # Standard Library Types Database
1075 stdlib_types_db = {
1076 # Empty entry for unknown dialect
1077 'unknown': (
1078 # LEAVE THIS EMPTY
1079 ),
1080 # Standard Library Types for PIM Modula-2
1081 'm2pim': (
1082 pim_stdlib_type_identifiers,
1083 ),
1085 # Standard Library Types for ISO Modula-2
1086 'm2iso': (
1087 iso_stdlib_type_identifiers,
1088 ),
1090 # Standard Library Types for Modula-2 R10
1091 'm2r10': (
1092 m2r10_stdlib_type_identifiers,
1093 ),
1095 # Standard Library Types for Objective Modula-2
1096 'objm2': (
1097 m2r10_stdlib_type_identifiers,
1098 ),
1100 # Standard Library Types for Aglet Modula-2
1101 'm2iso+aglet': (
1102 iso_stdlib_type_identifiers,
1103 ),
1105 # Standard Library Types for GNU Modula-2
1106 'm2pim+gm2': (
1107 pim_stdlib_type_identifiers,
1108 ),
1110 # Standard Library Types for p1 Modula-2
1111 'm2iso+p1': (
1112 iso_stdlib_type_identifiers,
1113 ),
1115 # Standard Library Types for XDS Modula-2
1116 'm2iso+xds': (
1117 iso_stdlib_type_identifiers,
1118 ),
1119 }
1121 # Standard Library Procedures Database
1122 stdlib_procedures_db = {
1123 # Empty entry for unknown dialect
1124 'unknown': (
1125 # LEAVE THIS EMPTY
1126 ),
1127 # Standard Library Procedures for PIM Modula-2
1128 'm2pim': (
1129 pim_stdlib_proc_identifiers,
1130 ),
1132 # Standard Library Procedures for ISO Modula-2
1133 'm2iso': (
1134 iso_stdlib_proc_identifiers,
1135 ),
1137 # Standard Library Procedures for Modula-2 R10
1138 'm2r10': (
1139 m2r10_stdlib_proc_identifiers,
1140 ),
1142 # Standard Library Procedures for Objective Modula-2
1143 'objm2': (
1144 m2r10_stdlib_proc_identifiers,
1145 ),
1147 # Standard Library Procedures for Aglet Modula-2
1148 'm2iso+aglet': (
1149 iso_stdlib_proc_identifiers,
1150 ),
1152 # Standard Library Procedures for GNU Modula-2
1153 'm2pim+gm2': (
1154 pim_stdlib_proc_identifiers,
1155 ),
1157 # Standard Library Procedures for p1 Modula-2
1158 'm2iso+p1': (
1159 iso_stdlib_proc_identifiers,
1160 ),
1162 # Standard Library Procedures for XDS Modula-2
1163 'm2iso+xds': (
1164 iso_stdlib_proc_identifiers,
1165 ),
1166 }
1168 # Standard Library Variables Database
1169 stdlib_variables_db = {
1170 # Empty entry for unknown dialect
1171 'unknown': (
1172 # LEAVE THIS EMPTY
1173 ),
1174 # Standard Library Variables for PIM Modula-2
1175 'm2pim': (
1176 pim_stdlib_var_identifiers,
1177 ),
1179 # Standard Library Variables for ISO Modula-2
1180 'm2iso': (
1181 iso_stdlib_var_identifiers,
1182 ),
1184 # Standard Library Variables for Modula-2 R10
1185 'm2r10': (
1186 m2r10_stdlib_var_identifiers,
1187 ),
1189 # Standard Library Variables for Objective Modula-2
1190 'objm2': (
1191 m2r10_stdlib_var_identifiers,
1192 ),
1194 # Standard Library Variables for Aglet Modula-2
1195 'm2iso+aglet': (
1196 iso_stdlib_var_identifiers,
1197 ),
1199 # Standard Library Variables for GNU Modula-2
1200 'm2pim+gm2': (
1201 pim_stdlib_var_identifiers,
1202 ),
1204 # Standard Library Variables for p1 Modula-2
1205 'm2iso+p1': (
1206 iso_stdlib_var_identifiers,
1207 ),
1209 # Standard Library Variables for XDS Modula-2
1210 'm2iso+xds': (
1211 iso_stdlib_var_identifiers,
1212 ),
1213 }
1215 # Standard Library Constants Database
1216 stdlib_constants_db = {
1217 # Empty entry for unknown dialect
1218 'unknown': (
1219 # LEAVE THIS EMPTY
1220 ),
1221 # Standard Library Constants for PIM Modula-2
1222 'm2pim': (
1223 pim_stdlib_const_identifiers,
1224 ),
1226 # Standard Library Constants for ISO Modula-2
1227 'm2iso': (
1228 iso_stdlib_const_identifiers,
1229 ),
1231 # Standard Library Constants for Modula-2 R10
1232 'm2r10': (
1233 m2r10_stdlib_const_identifiers,
1234 ),
1236 # Standard Library Constants for Objective Modula-2
1237 'objm2': (
1238 m2r10_stdlib_const_identifiers,
1239 ),
1241 # Standard Library Constants for Aglet Modula-2
1242 'm2iso+aglet': (
1243 iso_stdlib_const_identifiers,
1244 ),
1246 # Standard Library Constants for GNU Modula-2
1247 'm2pim+gm2': (
1248 pim_stdlib_const_identifiers,
1249 ),
1251 # Standard Library Constants for p1 Modula-2
1252 'm2iso+p1': (
1253 iso_stdlib_const_identifiers,
1254 ),
1256 # Standard Library Constants for XDS Modula-2
1257 'm2iso+xds': (
1258 iso_stdlib_const_identifiers,
1259 ),
1260 }
1262# M e t h o d s
1264 # initialise a lexer instance
1265 def __init__(self, **options):
1266 #
1267 # check dialect options
1268 #
1269 dialects = get_list_opt(options, 'dialect', [])
1270 #
1271 for dialect_option in dialects:
1272 if dialect_option in self.dialects[1:-1]:
1273 # valid dialect option found
1274 self.set_dialect(dialect_option)
1275 break
1276 #
1277 # Fallback Mode (DEFAULT)
1278 else:
1279 # no valid dialect option
1280 self.set_dialect('unknown')
1281 #
1282 self.dialect_set_by_tag = False
1283 #
1284 # check style options
1285 #
1286 styles = get_list_opt(options, 'style', [])
1287 #
1288 # use lowercase mode for Algol style
1289 if 'algol' in styles or 'algol_nu' in styles:
1290 self.algol_publication_mode = True
1291 else:
1292 self.algol_publication_mode = False
1293 #
1294 # Check option flags
1295 #
1296 self.treat_stdlib_adts_as_builtins = get_bool_opt(
1297 options, 'treat_stdlib_adts_as_builtins', True)
1298 #
1299 # call superclass initialiser
1300 RegexLexer.__init__(self, **options)
1302 # Set lexer to a specified dialect
1303 def set_dialect(self, dialect_id):
1304 #
1305 # if __debug__:
1306 # print 'entered set_dialect with arg: ', dialect_id
1307 #
1308 # check dialect name against known dialects
1309 if dialect_id not in self.dialects:
1310 dialect = 'unknown' # default
1311 else:
1312 dialect = dialect_id
1313 #
1314 # compose lexemes to reject set
1315 lexemes_to_reject_set = set()
1316 # add each list of reject lexemes for this dialect
1317 for list in self.lexemes_to_reject_db[dialect]:
1318 lexemes_to_reject_set.update(set(list))
1319 #
1320 # compose reserved words set
1321 reswords_set = set()
1322 # add each list of reserved words for this dialect
1323 for list in self.reserved_words_db[dialect]:
1324 reswords_set.update(set(list))
1325 #
1326 # compose builtins set
1327 builtins_set = set()
1328 # add each list of builtins for this dialect excluding reserved words
1329 for list in self.builtins_db[dialect]:
1330 builtins_set.update(set(list).difference(reswords_set))
1331 #
1332 # compose pseudo-builtins set
1333 pseudo_builtins_set = set()
1334 # add each list of builtins for this dialect excluding reserved words
1335 for list in self.pseudo_builtins_db[dialect]:
1336 pseudo_builtins_set.update(set(list).difference(reswords_set))
1337 #
1338 # compose ADTs set
1339 adts_set = set()
1340 # add each list of ADTs for this dialect excluding reserved words
1341 for list in self.stdlib_adts_db[dialect]:
1342 adts_set.update(set(list).difference(reswords_set))
1343 #
1344 # compose modules set
1345 modules_set = set()
1346 # add each list of builtins for this dialect excluding builtins
1347 for list in self.stdlib_modules_db[dialect]:
1348 modules_set.update(set(list).difference(builtins_set))
1349 #
1350 # compose types set
1351 types_set = set()
1352 # add each list of types for this dialect excluding builtins
1353 for list in self.stdlib_types_db[dialect]:
1354 types_set.update(set(list).difference(builtins_set))
1355 #
1356 # compose procedures set
1357 procedures_set = set()
1358 # add each list of procedures for this dialect excluding builtins
1359 for list in self.stdlib_procedures_db[dialect]:
1360 procedures_set.update(set(list).difference(builtins_set))
1361 #
1362 # compose variables set
1363 variables_set = set()
1364 # add each list of variables for this dialect excluding builtins
1365 for list in self.stdlib_variables_db[dialect]:
1366 variables_set.update(set(list).difference(builtins_set))
1367 #
1368 # compose constants set
1369 constants_set = set()
1370 # add each list of constants for this dialect excluding builtins
1371 for list in self.stdlib_constants_db[dialect]:
1372 constants_set.update(set(list).difference(builtins_set))
1373 #
1374 # update lexer state
1375 self.dialect = dialect
1376 self.lexemes_to_reject = lexemes_to_reject_set
1377 self.reserved_words = reswords_set
1378 self.builtins = builtins_set
1379 self.pseudo_builtins = pseudo_builtins_set
1380 self.adts = adts_set
1381 self.modules = modules_set
1382 self.types = types_set
1383 self.procedures = procedures_set
1384 self.variables = variables_set
1385 self.constants = constants_set
1386 #
1387 # if __debug__:
1388 # print 'exiting set_dialect'
1389 # print ' self.dialect: ', self.dialect
1390 # print ' self.lexemes_to_reject: ', self.lexemes_to_reject
1391 # print ' self.reserved_words: ', self.reserved_words
1392 # print ' self.builtins: ', self.builtins
1393 # print ' self.pseudo_builtins: ', self.pseudo_builtins
1394 # print ' self.adts: ', self.adts
1395 # print ' self.modules: ', self.modules
1396 # print ' self.types: ', self.types
1397 # print ' self.procedures: ', self.procedures
1398 # print ' self.variables: ', self.variables
1399 # print ' self.types: ', self.types
1400 # print ' self.constants: ', self.constants
1402 # Extracts a dialect name from a dialect tag comment string and checks
1403 # the extracted name against known dialects. If a match is found, the
1404 # matching name is returned, otherwise dialect id 'unknown' is returned
1405 def get_dialect_from_dialect_tag(self, dialect_tag):
1406 #
1407 # if __debug__:
1408 # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
1409 #
1410 # constants
1411 left_tag_delim = '(*!'
1412 right_tag_delim = '*)'
1413 left_tag_delim_len = len(left_tag_delim)
1414 right_tag_delim_len = len(right_tag_delim)
1415 indicator_start = left_tag_delim_len
1416 indicator_end = -(right_tag_delim_len)
1417 #
1418 # check comment string for dialect indicator
1419 if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
1420 and dialect_tag.startswith(left_tag_delim) \
1421 and dialect_tag.endswith(right_tag_delim):
1422 #
1423 # if __debug__:
1424 # print 'dialect tag found'
1425 #
1426 # extract dialect indicator
1427 indicator = dialect_tag[indicator_start:indicator_end]
1428 #
1429 # if __debug__:
1430 # print 'extracted: ', indicator
1431 #
1432 # check against known dialects
1433 for index in range(1, len(self.dialects)):
1434 #
1435 # if __debug__:
1436 # print 'dialects[', index, ']: ', self.dialects[index]
1437 #
1438 if indicator == self.dialects[index]:
1439 #
1440 # if __debug__:
1441 # print 'matching dialect found'
1442 #
1443 # indicator matches known dialect
1444 return indicator
1445 else:
1446 # indicator does not match any dialect
1447 return 'unknown' # default
1448 else:
1449 # invalid indicator string
1450 return 'unknown' # default
1452 # intercept the token stream, modify token attributes and return them
1453 def get_tokens_unprocessed(self, text):
1454 for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
1455 #
1456 # check for dialect tag if dialect has not been set by tag
1457 if not self.dialect_set_by_tag and token == Comment.Special:
1458 indicated_dialect = self.get_dialect_from_dialect_tag(value)
1459 if indicated_dialect != 'unknown':
1460 # token is a dialect indicator
1461 # reset reserved words and builtins
1462 self.set_dialect(indicated_dialect)
1463 self.dialect_set_by_tag = True
1464 #
1465 # check for reserved words, predefined and stdlib identifiers
1466 if token is Name:
1467 if value in self.reserved_words:
1468 token = Keyword.Reserved
1469 if self.algol_publication_mode:
1470 value = value.lower()
1471 #
1472 elif value in self.builtins:
1473 token = Name.Builtin
1474 if self.algol_publication_mode:
1475 value = value.lower()
1476 #
1477 elif value in self.pseudo_builtins:
1478 token = Name.Builtin.Pseudo
1479 if self.algol_publication_mode:
1480 value = value.lower()
1481 #
1482 elif value in self.adts:
1483 if not self.treat_stdlib_adts_as_builtins:
1484 token = Name.Namespace
1485 else:
1486 token = Name.Builtin.Pseudo
1487 if self.algol_publication_mode:
1488 value = value.lower()
1489 #
1490 elif value in self.modules:
1491 token = Name.Namespace
1492 #
1493 elif value in self.types:
1494 token = Name.Class
1495 #
1496 elif value in self.procedures:
1497 token = Name.Function
1498 #
1499 elif value in self.variables:
1500 token = Name.Variable
1501 #
1502 elif value in self.constants:
1503 token = Name.Constant
1504 #
1505 elif token in Number:
1506 #
1507 # mark prefix number literals as error for PIM and ISO dialects
1508 if self.dialect not in ('unknown', 'm2r10', 'objm2'):
1509 if "'" in value or value[0:2] in ('0b', '0x', '0u'):
1510 token = Error
1511 #
1512 elif self.dialect in ('m2r10', 'objm2'):
1513 # mark base-8 number literals as errors for M2 R10 and ObjM2
1514 if token is Number.Oct:
1515 token = Error
1516 # mark suffix base-16 literals as errors for M2 R10 and ObjM2
1517 elif token is Number.Hex and 'H' in value:
1518 token = Error
1519 # mark real numbers with E as errors for M2 R10 and ObjM2
1520 elif token is Number.Float and 'E' in value:
1521 token = Error
1522 #
1523 elif token in Comment:
1524 #
1525 # mark single line comment as error for PIM and ISO dialects
1526 if token is Comment.Single:
1527 if self.dialect not in ('unknown', 'm2r10', 'objm2'):
1528 token = Error
1529 #
1530 if token is Comment.Preproc:
1531 # mark ISO pragma as error for PIM dialects
1532 if value.startswith('<*') and \
1533 self.dialect.startswith('m2pim'):
1534 token = Error
1535 # mark PIM pragma as comment for other dialects
1536 elif value.startswith('(*$') and \
1537 self.dialect != 'unknown' and \
1538 not self.dialect.startswith('m2pim'):
1539 token = Comment.Multiline
1540 #
1541 else: # token is neither Name nor Comment
1542 #
1543 # mark lexemes matching the dialect's error token set as errors
1544 if value in self.lexemes_to_reject:
1545 token = Error
1546 #
1547 # substitute lexemes when in Algol mode
1548 if self.algol_publication_mode:
1549 if value == '#':
1550 value = '≠'
1551 elif value == '<=':
1552 value = '≤'
1553 elif value == '>=':
1554 value = '≥'
1555 elif value == '==':
1556 value = '≡'
1557 elif value == '*.':
1558 value = '•'
1560 # return result
1561 yield index, token, value
1563 def analyse_text(text):
1564 """It's Pascal-like, but does not use FUNCTION -- uses PROCEDURE
1565 instead."""
1567 # Check if this looks like Pascal, if not, bail out early
1568 if not ('(*' in text and '*)' in text and ':=' in text):
1569 return
1571 result = 0
1572 # Procedure is in Modula2
1573 if re.search(r'\bPROCEDURE\b', text):
1574 result += 0.6
1576 # FUNCTION is only valid in Pascal, but not in Modula2
1577 if re.search(r'\bFUNCTION\b', text):
1578 result = 0.0
1580 return result