Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/c

1"""

2 pygments.lexers.c_cpp

3 ~~~~~~~~~~~~~~~~~~~~~

5 Lexers for C/C++ languages.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexer import RegexLexer, include, bygroups, using, \

14 this, inherit, default, words

15from pygments.util import get_bool_opt

16from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

17 Number, Punctuation, Whitespace

19__all__ = ['CLexer', 'CppLexer']

22class CFamilyLexer(RegexLexer):

23 """

24 For C family source code. This is used as a base class to avoid repetitious

25 definitions.

26 """

28 # The trailing ?, rather than *, avoids a geometric performance drop here.

29 #: only one /* */ style comment

30 _ws1 = r'\s*(?:/[*].*?[*]/\s*)?'

32 # Hexadecimal part in an hexadecimal integer/floating-point literal.

33 # This includes decimal separators matching.

34 _hexpart = r'[0-9a-fA-F](\'?[0-9a-fA-F])*'

35 # Decimal part in an decimal integer/floating-point literal.

36 # This includes decimal separators matching.

37 _decpart = r'\d(\'?\d)*'

38 # Integer literal suffix (e.g. 'ull' or 'll').

39 _intsuffix = r'(([uU][lL]{0,2})|[lL]{1,2}[uU]?)?'

41 # Identifier regex with C and C++ Universal Character Name (UCN) support.

42 _ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8})+'

43 _namespaced_ident = r'(?!\d)(?:[\w$]|\\u[0-9a-fA-F]{4}|\\U[0-9a-fA-F]{8}|::)+'

45 # Single and multiline comment regexes

46 # Beware not to use *? for the inner content! When these regexes

47 # are embedded in larger regexes, that can cause the stuff*? to

48 # match more than it would have if the regex had been used in

49 # a standalone way ...

50 _comment_single = r'//(?:.|(?<=\\)\n)*\n'

51 _comment_multiline = r'/(?:\\\n)?[*](?:[^*]|[*](?!(?:\\\n)?/))*[*](?:\\\n)?/'

53 # Regex to match optional comments

54 _possible_comments = rf'\s*(?:(?:(?:{_comment_single})|(?:{_comment_multiline}))\s*)*'

56 tokens = {

57 'whitespace': [

58 # preprocessor directives: without whitespace

59 (r'^#if\s+0', Comment.Preproc, 'if0'),

60 ('^#', Comment.Preproc, 'macro'),

61 # or with whitespace

62 ('^(' + _ws1 + r')(#if\s+0)',

63 bygroups(using(this), Comment.Preproc), 'if0'),

64 ('^(' + _ws1 + ')(#)',

65 bygroups(using(this), Comment.Preproc), 'macro'),

66 # Labels:

67 # Line start and possible indentation.

68 (r'(^[ \t]*)'

69 # Not followed by keywords which can be mistaken as labels.

70 r'(?!(?:public|private|protected|default)\b)'

71 # Actual label, followed by a single colon.

72 r'(' + _ident + r')(\s*)(:)(?!:)',

73 bygroups(Whitespace, Name.Label, Whitespace, Punctuation)),

74 (r'\n', Whitespace),

75 (r'[^\S\n]+', Whitespace),

76 (r'\\\n', Text), # line continuation

77 (_comment_single, Comment.Single),

78 (_comment_multiline, Comment.Multiline),

79 # Open until EOF, so no ending delimiter

80 (r'/(\\\n)?[*][\w\W]*', Comment.Multiline),

81 ],

82 'statements': [

83 include('keywords'),

84 include('types'),

85 (r'([LuU]|u8)?(")', bygroups(String.Affix, String), 'string'),

86 (r"([LuU]|u8)?(')(\\.|\\[0-7]{1,3}|\\x[a-fA-F0-9]{1,2}|[^\\\'\n])(')",

87 bygroups(String.Affix, String.Char, String.Char, String.Char)),

89 # Hexadecimal floating-point literals (C11, C++17)

90 (r'0[xX](' + _hexpart + r'\.' + _hexpart + r'|\.' + _hexpart +

91 r'|' + _hexpart + r')[pP][+-]?' + _hexpart + r'[lL]?', Number.Float),

93 (r'(-)?(' + _decpart + r'\.' + _decpart + r'|\.' + _decpart + r'|' +

94 _decpart + r')[eE][+-]?' + _decpart + r'[fFlL]?', Number.Float),

95 (r'(-)?((' + _decpart + r'\.(' + _decpart + r')?|\.' +

96 _decpart + r')[fFlL]?)|(' + _decpart + r'[fFlL])', Number.Float),

97 (r'(-)?0[xX]' + _hexpart + _intsuffix, Number.Hex),

98 (r'(-)?0[bB][01](\'?[01])*' + _intsuffix, Number.Bin),

99 (r'(-)?0(\'?[0-7])+' + _intsuffix, Number.Oct),

100 (r'(-)?' + _decpart + _intsuffix, Number.Integer),

101 (r'[~!%^&*+=|?:<>/-]', Operator),

102 (r'[()\[\],.]', Punctuation),

103 (r'(true|false|NULL)\b', Name.Builtin),

104 (_ident, Name)

105 ],

106 'types': [

107 (words(('int8', 'int16', 'int32', 'int64', 'wchar_t'), prefix=r'__',

108 suffix=r'\b'), Keyword.Reserved),

109 (words(('bool', 'int', 'long', 'float', 'short', 'double', 'char',

110 'unsigned', 'signed', 'void', '_BitInt',

111 '__int128'), suffix=r'\b'), Keyword.Type)

112 ],

113 'keywords': [

114 (r'(struct|union)(\s+)', bygroups(Keyword, Whitespace), 'classname'),

115 (r'case\b', Keyword, 'case-value'),

116 (words(('asm', 'auto', 'break', 'const', 'continue', 'default',

117 'do', 'else', 'enum', 'extern', 'for', 'goto', 'if',

118 'register', 'restricted', 'return', 'sizeof', 'struct',

119 'static', 'switch', 'typedef', 'volatile', 'while', 'union',

120 'thread_local', 'alignas', 'alignof', 'static_assert', '_Pragma'),

121 suffix=r'\b'), Keyword),

122 (words(('inline', '_inline', '__inline', 'naked', 'restrict',

123 'thread'), suffix=r'\b'), Keyword.Reserved),

124 # Vector intrinsics

125 (r'(__m(128i|128d|128|64))\b', Keyword.Reserved),

126 # Microsoft-isms

127 (words((

128 'asm', 'based', 'except', 'stdcall', 'cdecl',

129 'fastcall', 'declspec', 'finally', 'try',

130 'leave', 'w64', 'unaligned', 'raise', 'noop',

131 'identifier', 'forceinline', 'assume'),

132 prefix=r'__', suffix=r'\b'), Keyword.Reserved)

133 ],

134 'root': [

135 include('whitespace'),

136 include('keywords'),

137 # functions

138 (r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments

139 r'(' + _possible_comments + r')'

140 r'(' + _namespaced_ident + r')' # method name

141 r'(' + _possible_comments + r')'

142 r'($[^;"\')]*?$)' # signature

143 r'(' + _possible_comments + r')'

144 r'([^;{/"\']*)(\{)',

145 bygroups(using(this), using(this, state='whitespace'),

146 Name.Function, using(this, state='whitespace'),

147 using(this), using(this, state='whitespace'),

148 using(this), Punctuation),

149 'function'),

150 # function declarations

151 (r'(' + _namespaced_ident + r'(?:[&*\s])+)' # return arguments

152 r'(' + _possible_comments + r')'

153 r'(' + _namespaced_ident + r')' # method name

154 r'(' + _possible_comments + r')'

155 r'($[^;"\')]*?$)' # signature

156 r'(' + _possible_comments + r')'

157 r'([^;/"\']*)(;)',

158 bygroups(using(this), using(this, state='whitespace'),

159 Name.Function, using(this, state='whitespace'),

160 using(this), using(this, state='whitespace'),

161 using(this), Punctuation)),

162 include('types'),

163 default('statement'),

164 ],

165 'statement': [

166 include('whitespace'),

167 include('statements'),

168 (r'\}', Punctuation),

169 (r'[{;]', Punctuation, '#pop'),

170 ],

171 'function': [

172 include('whitespace'),

173 include('statements'),

174 (';', Punctuation),

175 (r'\{', Punctuation, '#push'),

176 (r'\}', Punctuation, '#pop'),

177 ],

178 'string': [

179 (r'"', String, '#pop'),

180 (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|'

181 r'u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8}|[0-7]{1,3})', String.Escape),

182 (r'[^\\"\n]+', String), # all other characters

183 (r'\\\n', String), # line continuation

184 (r'\\', String), # stray backslash

185 ],

186 'macro': [

187 (r'('+_ws1+r')(include)('+_ws1+r')("[^"]+")([^\n]*)',

188 bygroups(using(this), Comment.Preproc, using(this),

189 Comment.PreprocFile, Comment.Single)),

190 (r'('+_ws1+r')(include)('+_ws1+r')(<[^>]+>)([^\n]*)',

191 bygroups(using(this), Comment.Preproc, using(this),

192 Comment.PreprocFile, Comment.Single)),

193 (r'[^/\n]+', Comment.Preproc),

194 (r'/[*](.|\n)*?[*]/', Comment.Multiline),

195 (r'//.*?\n', Comment.Single, '#pop'),

196 (r'/', Comment.Preproc),

197 (r'(?<=\\)\n', Comment.Preproc),

198 (r'\n', Comment.Preproc, '#pop'),

199 ],

200 'if0': [

201 (r'^\s*#if.*?(?<!\\)\n', Comment.Preproc, '#push'),

202 (r'^\s*#el(?:se|if).*\n', Comment.Preproc, '#pop'),

203 (r'^\s*#endif.*?(?<!\\)\n', Comment.Preproc, '#pop'),

204 (r'.*?\n', Comment),

205 ],

206 'classname': [

207 (_ident, Name.Class, '#pop'),

208 # template specification

209 (r'\s*(?=>)', Text, '#pop'),

210 default('#pop')

211 ],

212 # Mark identifiers preceded by `case` keyword as constants.

213 'case-value': [

214 (r'(?<!:)(:)(?!:)', Punctuation, '#pop'),

215 (_ident, Name.Constant),

216 include('whitespace'),

217 include('statements'),

218 ]

219 }

220

221 stdlib_types = {

222 'size_t', 'ssize_t', 'off_t', 'wchar_t', 'ptrdiff_t', 'sig_atomic_t', 'fpos_t',

223 'clock_t', 'time_t', 'va_list', 'jmp_buf', 'FILE', 'DIR', 'div_t', 'ldiv_t',

224 'mbstate_t', 'wctrans_t', 'wint_t', 'wctype_t'}

225 c99_types = {

226 'int8_t', 'int16_t', 'int32_t', 'int64_t', 'uint8_t',

227 'uint16_t', 'uint32_t', 'uint64_t', 'int_least8_t', 'int_least16_t',

228 'int_least32_t', 'int_least64_t', 'uint_least8_t', 'uint_least16_t',

229 'uint_least32_t', 'uint_least64_t', 'int_fast8_t', 'int_fast16_t', 'int_fast32_t',

230 'int_fast64_t', 'uint_fast8_t', 'uint_fast16_t', 'uint_fast32_t', 'uint_fast64_t',

231 'intptr_t', 'uintptr_t', 'intmax_t', 'uintmax_t'}

232 linux_types = {

233 'clockid_t', 'cpu_set_t', 'cpumask_t', 'dev_t', 'gid_t', 'id_t', 'ino_t', 'key_t',

234 'mode_t', 'nfds_t', 'pid_t', 'rlim_t', 'sig_t', 'sighandler_t', 'siginfo_t',

235 'sigset_t', 'sigval_t', 'socklen_t', 'timer_t', 'uid_t'}

236 c11_atomic_types = {

237 'atomic_bool', 'atomic_char', 'atomic_schar', 'atomic_uchar', 'atomic_short',

238 'atomic_ushort', 'atomic_int', 'atomic_uint', 'atomic_long', 'atomic_ulong',

239 'atomic_llong', 'atomic_ullong', 'atomic_char16_t', 'atomic_char32_t', 'atomic_wchar_t',

240 'atomic_int_least8_t', 'atomic_uint_least8_t', 'atomic_int_least16_t',

241 'atomic_uint_least16_t', 'atomic_int_least32_t', 'atomic_uint_least32_t',

242 'atomic_int_least64_t', 'atomic_uint_least64_t', 'atomic_int_fast8_t',

243 'atomic_uint_fast8_t', 'atomic_int_fast16_t', 'atomic_uint_fast16_t',

244 'atomic_int_fast32_t', 'atomic_uint_fast32_t', 'atomic_int_fast64_t',

245 'atomic_uint_fast64_t', 'atomic_intptr_t', 'atomic_uintptr_t', 'atomic_size_t',

246 'atomic_ptrdiff_t', 'atomic_intmax_t', 'atomic_uintmax_t'}

247

248 def __init__(self, **options):

249 self.stdlibhighlighting = get_bool_opt(options, 'stdlibhighlighting', True)

250 self.c99highlighting = get_bool_opt(options, 'c99highlighting', True)

251 self.c11highlighting = get_bool_opt(options, 'c11highlighting', True)

252 self.platformhighlighting = get_bool_opt(options, 'platformhighlighting', True)

253 RegexLexer.__init__(self, **options)

254

255 def get_tokens_unprocessed(self, text, stack=('root',)):

256 for index, token, value in \

257 RegexLexer.get_tokens_unprocessed(self, text, stack):

258 if token is Name:

259 if self.stdlibhighlighting and value in self.stdlib_types:

260 token = Keyword.Type

261 elif self.c99highlighting and value in self.c99_types:

262 token = Keyword.Type

263 elif self.c11highlighting and value in self.c11_atomic_types:

264 token = Keyword.Type

265 elif self.platformhighlighting and value in self.linux_types:

266 token = Keyword.Type

267 yield index, token, value

268

269

270class CLexer(CFamilyLexer):

271 """

272 For C source code with preprocessor directives.

273

274 Additional options accepted:

275

276 `stdlibhighlighting`

277 Highlight common types found in the C/C++ standard library (e.g. `size_t`).

278 (default: ``True``).

279

280 `c99highlighting`

281 Highlight common types found in the C99 standard library (e.g. `int8_t`).

282 Actually, this includes all fixed-width integer types.

283 (default: ``True``).

284

285 `c11highlighting`

286 Highlight atomic types found in the C11 standard library (e.g. `atomic_bool`).

287 (default: ``True``).

288

289 `platformhighlighting`

290 Highlight common types found in the platform SDK headers (e.g. `clockid_t` on Linux).

291 (default: ``True``).

292 """

293 name = 'C'

294 aliases = ['c']

295 filenames = ['*.c', '*.h', '*.idc', '*.x[bp]m']

296 mimetypes = ['text/x-chdr', 'text/x-csrc', 'image/x-xbitmap', 'image/x-xpixmap']

297 priority = 0.1

298

299 tokens = {

300 'keywords': [

301 (words((

302 '_Alignas', '_Alignof', '_Noreturn', '_Generic', '_Thread_local',

303 '_Static_assert', '_Imaginary', 'noreturn', 'imaginary', 'complex'),

304 suffix=r'\b'), Keyword),

305 inherit

306 ],

307 'types': [

308 (words(('_Bool', '_Complex', '_Atomic'), suffix=r'\b'), Keyword.Type),

309 inherit

310 ]

311 }

312

313 def analyse_text(text):

314 if re.search(r'^\s*#include [<"]', text, re.MULTILINE):

315 return 0.1

316 if re.search(r'^\s*#ifn?def ', text, re.MULTILINE):

317 return 0.1

318

319

320class CppLexer(CFamilyLexer):

321 """

322 For C++ source code with preprocessor directives.

323

324 Additional options accepted:

325

326 `stdlibhighlighting`

327 Highlight common types found in the C/C++ standard library (e.g. `size_t`).

328 (default: ``True``).

329

330 `c99highlighting`

331 Highlight common types found in the C99 standard library (e.g. `int8_t`).

332 Actually, this includes all fixed-width integer types.

333 (default: ``True``).

334

335 `c11highlighting`

336 Highlight atomic types found in the C11 standard library (e.g. `atomic_bool`).

337 (default: ``True``).

338

339 `platformhighlighting`

340 Highlight common types found in the platform SDK headers (e.g. `clockid_t` on Linux).

341 (default: ``True``).

342 """

343 name = 'C++'

344 url = 'https://isocpp.org/'

345 aliases = ['cpp', 'c++']

346 filenames = ['*.cpp', '*.hpp', '*.c++', '*.h++',

347 '*.cc', '*.hh', '*.cxx', '*.hxx',

348 '*.C', '*.H', '*.cp', '*.CPP', '*.tpp']

349 mimetypes = ['text/x-c++hdr', 'text/x-c++src']

350 priority = 0.1

351

352 tokens = {

353 'statements': [

354 # C++11 raw strings

355 (r'((?:[LuU]|u8)?R)(")([^\$)\s]{,16})(\()((?:.|\n)*?)($\3)(")',

356 bygroups(String.Affix, String, String.Delimiter, String.Delimiter,

357 String, String.Delimiter, String)),

358 inherit,

359 ],

360 'root': [

361 inherit,

362 # C++ Microsoft-isms

363 (words(('virtual_inheritance', 'uuidof', 'super', 'single_inheritance',

364 'multiple_inheritance', 'interface', 'event'),

365 prefix=r'__', suffix=r'\b'), Keyword.Reserved),

366 # Offload C++ extensions, http://offload.codeplay.com/

367 (r'__(offload|blockingoffload|outer)\b', Keyword.Pseudo),

368 ],

369 'enumname': [

370 include('whitespace'),

371 # 'enum class' and 'enum struct' C++11 support

372 (words(('class', 'struct'), suffix=r'\b'), Keyword),

373 (CFamilyLexer._ident, Name.Class, '#pop'),

374 # template specification

375 (r'\s*(?=>)', Text, '#pop'),

376 default('#pop')

377 ],

378 'keywords': [

379 (r'(class|concept|typename)(\s+)', bygroups(Keyword, Whitespace), 'classname'),

380 (words((

381 'catch', 'const_cast', 'delete', 'dynamic_cast', 'explicit',

382 'export', 'friend', 'mutable', 'new', 'operator',

383 'private', 'protected', 'public', 'reinterpret_cast', 'class',

384 '__restrict', 'static_cast', 'template', 'this', 'throw', 'throws',

385 'try', 'typeid', 'using', 'virtual', 'constexpr', 'nullptr', 'concept',

386 'decltype', 'noexcept', 'override', 'final', 'constinit', 'consteval',

387 'co_await', 'co_return', 'co_yield', 'requires', 'import', 'module',

388 'typename', 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not',

389 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq'),

390 suffix=r'\b'), Keyword),

391 (r'namespace\b', Keyword, 'namespace'),

392 (r'(enum)(\s+)', bygroups(Keyword, Whitespace), 'enumname'),

393 inherit

394 ],

395 'types': [

396 (r'char(16_t|32_t|8_t)\b', Keyword.Type),

397 inherit

398 ],

399 'namespace': [

400 (r'[;{]', Punctuation, ('#pop', 'root')),

401 (r'inline\b', Keyword.Reserved),

402 (CFamilyLexer._ident, Name.Namespace),

403 include('statement')

404 ]

405 }

406

407 def analyse_text(text):

408 if re.search('#include <[a-z_]+>', text):

409 return 0.2

410 if re.search('using namespace ', text):

411 return 0.4

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/c_cpp.py: 76%

63 statements