Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/textfmts.py: 62%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

120 statements  

1""" 

2 pygments.lexers.textfmts 

3 ~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for various text formats. 

6 

7 :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexers import guess_lexer, get_lexer_by_name 

14from pygments.lexer import RegexLexer, bygroups, default, include 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Generic, Literal, Punctuation 

17from pygments.util import ClassNotFound 

18 

19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer', 

20 'NotmuchLexer', 'KernelLogLexer'] 

21 

22 

23class IrcLogsLexer(RegexLexer): 

24 """ 

25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. 

26 """ 

27 

28 name = 'IRC logs' 

29 aliases = ['irc'] 

30 filenames = ['*.weechatlog'] 

31 mimetypes = ['text/x-irclog'] 

32 url = 'https://en.wikipedia.org/wiki/Internet_Relay_Chat' 

33 version_added = '' 

34 

35 flags = re.VERBOSE | re.MULTILINE 

36 timestamp = r""" 

37 ( 

38 # irssi / xchat and others 

39 (?: \[|\()? # Opening bracket or paren for the timestamp 

40 (?: # Timestamp 

41 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits 

42 (?:\d{1,4}) 

43 [T ])? # Date/time separator: T or space 

44 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits 

45 (?: \d?\d) 

46 ) 

47 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp 

48 | 

49 # weechat 

50 \d{4}\s\w{3}\s\d{2}\s # Date 

51 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace 

52 | 

53 # xchat 

54 \w{3}\s\d{2}\s # Date 

55 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace 

56 )? 

57 """ 

58 tokens = { 

59 'root': [ 

60 # log start/end 

61 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment), 

62 # hack 

63 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)), 

64 # normal msgs 

65 ("^" + timestamp + r""" 

66 (\s*<.*?>\s*) # Nick """, 

67 bygroups(Comment.Preproc, Name.Tag), 'msg'), 

68 # /me msgs 

69 ("^" + timestamp + r""" 

70 (\s*[*]\s+) # Star 

71 (\S+\s+.*?\n) # Nick + rest of message """, 

72 bygroups(Comment.Preproc, Keyword, Generic.Inserted)), 

73 # join/part msgs 

74 ("^" + timestamp + r""" 

75 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols 

76 (\S+\s+) # Nick + Space 

77 (.*?\n) # Rest of message """, 

78 bygroups(Comment.Preproc, Keyword, String, Comment)), 

79 (r"^.*?\n", Text), 

80 ], 

81 'msg': [ 

82 (r"\S+:(?!//)", Name.Attribute), # Prefix 

83 (r".*\n", Text, '#pop'), 

84 ], 

85 } 

86 

87 

88class GettextLexer(RegexLexer): 

89 """ 

90 Lexer for Gettext catalog files. 

91 """ 

92 name = 'Gettext Catalog' 

93 aliases = ['pot', 'po'] 

94 filenames = ['*.pot', '*.po'] 

95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] 

96 url = 'https://www.gnu.org/software/gettext' 

97 version_added = '0.9' 

98 

99 tokens = { 

100 'root': [ 

101 (r'^#,\s.*?$', Keyword.Type), 

102 (r'^#:\s.*?$', Keyword.Declaration), 

103 # (r'^#$', Comment), 

104 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), 

105 (r'^(")([A-Za-z-]+:)(.*")$', 

106 bygroups(String, Name.Property, String)), 

107 (r'^".*"$', String), 

108 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$', 

109 bygroups(Name.Variable, Text, String)), 

110 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', 

111 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), 

112 ] 

113 } 

114 

115 

116class HttpLexer(RegexLexer): 

117 """ 

118 Lexer for HTTP sessions. 

119 """ 

120 

121 name = 'HTTP' 

122 aliases = ['http'] 

123 url = 'https://httpwg.org/specs' 

124 version_added = '1.5' 

125 

126 flags = re.DOTALL 

127 

128 def get_tokens_unprocessed(self, text, stack=('root',)): 

129 """Reset the content-type state.""" 

130 self.content_type = None 

131 return RegexLexer.get_tokens_unprocessed(self, text, stack) 

132 

133 def header_callback(self, match): 

134 if match.group(1).lower() == 'content-type': 

135 content_type = match.group(5).strip() 

136 if ';' in content_type: 

137 content_type = content_type[:content_type.find(';')].strip() 

138 self.content_type = content_type 

139 yield match.start(1), Name.Attribute, match.group(1) 

140 yield match.start(2), Text, match.group(2) 

141 yield match.start(3), Operator, match.group(3) 

142 yield match.start(4), Text, match.group(4) 

143 yield match.start(5), Literal, match.group(5) 

144 yield match.start(6), Text, match.group(6) 

145 

146 def continuous_header_callback(self, match): 

147 yield match.start(1), Text, match.group(1) 

148 yield match.start(2), Literal, match.group(2) 

149 yield match.start(3), Text, match.group(3) 

150 

151 def content_callback(self, match): 

152 content_type = getattr(self, 'content_type', None) 

153 content = match.group() 

154 offset = match.start() 

155 if content_type: 

156 from pygments.lexers import get_lexer_for_mimetype 

157 possible_lexer_mimetypes = [content_type] 

158 if '+' in content_type: 

159 # application/calendar+xml can be treated as application/xml 

160 # if there's not a better match. 

161 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2', 

162 content_type) 

163 possible_lexer_mimetypes.append(general_type) 

164 

165 for i in possible_lexer_mimetypes: 

166 try: 

167 lexer = get_lexer_for_mimetype(i) 

168 except ClassNotFound: 

169 pass 

170 else: 

171 for idx, token, value in lexer.get_tokens_unprocessed(content): 

172 yield offset + idx, token, value 

173 return 

174 yield offset, Text, content 

175 

176 tokens = { 

177 'root': [ 

178 (r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)' 

179 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)', 

180 bygroups(Name.Function, Text, Name.Namespace, Text, 

181 Keyword.Reserved, Operator, Number, Text), 

182 'headers'), 

183 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)', 

184 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, 

185 Name.Exception, Text), 

186 'headers'), 

187 ], 

188 'headers': [ 

189 (r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback), 

190 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback), 

191 (r'\r?\n', Text, 'content') 

192 ], 

193 'content': [ 

194 (r'.+', content_callback) 

195 ] 

196 } 

197 

198 def analyse_text(text): 

199 return any ( 

200 re.search(pattern, text) is not None 

201 for pattern in ( 

202 r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)', 

203 r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)', 

204 ) 

205 ) 

206 

207 

208class TodotxtLexer(RegexLexer): 

209 """ 

210 Lexer for Todo.txt todo list format. 

211 """ 

212 

213 name = 'Todotxt' 

214 url = 'http://todotxt.com/' 

215 aliases = ['todotxt'] 

216 version_added = '2.0' 

217 # *.todotxt is not a standard extension for Todo.txt files; including it 

218 # makes testing easier, and also makes autodetecting file type easier. 

219 filenames = ['todo.txt', '*.todotxt'] 

220 mimetypes = ['text/x-todo'] 

221 

222 # Aliases mapping standard token types of Todo.txt format concepts 

223 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks 

224 IncompleteTaskText = Text # Incomplete tasks should look like plain text 

225 

226 # Priority should have most emphasis to indicate importance of tasks 

227 Priority = Generic.Heading 

228 # Dates should have next most emphasis because time is important 

229 Date = Generic.Subheading 

230 

231 # Project and context should have equal weight, and be in different colors 

232 Project = Generic.Error 

233 Context = String 

234 

235 # If tag functionality is added, it should have the same weight as Project 

236 # and Context, and a different color. Generic.Traceback would work well. 

237 

238 # Regex patterns for building up rules; dates, priorities, projects, and 

239 # contexts are all atomic 

240 # TODO: Make date regex more ISO 8601 compliant 

241 date_regex = r'\d{4,}-\d{2}-\d{2}' 

242 priority_regex = r'\([A-Z]\)' 

243 project_regex = r'\+\S+' 

244 context_regex = r'@\S+' 

245 

246 # Compound regex expressions 

247 complete_one_date_regex = r'(x )(' + date_regex + r')' 

248 complete_two_date_regex = (complete_one_date_regex + r'( )(' + 

249 date_regex + r')') 

250 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')' 

251 

252 tokens = { 

253 # Should parse starting at beginning of line; each line is a task 

254 'root': [ 

255 # Complete task entry points: two total: 

256 # 1. Complete task with two dates 

257 (complete_two_date_regex, bygroups(CompleteTaskText, Date, 

258 CompleteTaskText, Date), 

259 'complete'), 

260 # 2. Complete task with one date 

261 (complete_one_date_regex, bygroups(CompleteTaskText, Date), 

262 'complete'), 

263 

264 # Incomplete task entry points: six total: 

265 # 1. Priority plus date 

266 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date), 

267 'incomplete'), 

268 # 2. Priority only 

269 (priority_regex, Priority, 'incomplete'), 

270 # 3. Leading date 

271 (date_regex, Date, 'incomplete'), 

272 # 4. Leading context 

273 (context_regex, Context, 'incomplete'), 

274 # 5. Leading project 

275 (project_regex, Project, 'incomplete'), 

276 # 6. Non-whitespace catch-all 

277 (r'\S+', IncompleteTaskText, 'incomplete'), 

278 ], 

279 

280 # Parse a complete task 

281 'complete': [ 

282 # Newline indicates end of task, should return to root 

283 (r'\s*\n', CompleteTaskText, '#pop'), 

284 # Tokenize contexts and projects 

285 (context_regex, Context), 

286 (project_regex, Project), 

287 # Tokenize non-whitespace text 

288 (r'\S+', CompleteTaskText), 

289 # Tokenize whitespace not containing a newline 

290 (r'\s+', CompleteTaskText), 

291 ], 

292 

293 # Parse an incomplete task 

294 'incomplete': [ 

295 # Newline indicates end of task, should return to root 

296 (r'\s*\n', IncompleteTaskText, '#pop'), 

297 # Tokenize contexts and projects 

298 (context_regex, Context), 

299 (project_regex, Project), 

300 # Tokenize non-whitespace text 

301 (r'\S+', IncompleteTaskText), 

302 # Tokenize whitespace not containing a newline 

303 (r'\s+', IncompleteTaskText), 

304 ], 

305 } 

306 

307 

308class NotmuchLexer(RegexLexer): 

309 """ 

310 For Notmuch email text format. 

311 

312 Additional options accepted: 

313 

314 `body_lexer` 

315 If given, highlight the contents of the message body with the specified 

316 lexer, else guess it according to the body content (default: ``None``). 

317 """ 

318 

319 name = 'Notmuch' 

320 url = 'https://notmuchmail.org/' 

321 aliases = ['notmuch'] 

322 version_added = '2.5' 

323 

324 def _highlight_code(self, match): 

325 code = match.group(1) 

326 

327 try: 

328 if self.body_lexer: 

329 lexer = get_lexer_by_name(self.body_lexer) 

330 else: 

331 lexer = guess_lexer(code.strip()) 

332 except ClassNotFound: 

333 lexer = get_lexer_by_name('text') 

334 

335 yield from lexer.get_tokens_unprocessed(code) 

336 

337 tokens = { 

338 'root': [ 

339 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')), 

340 ], 

341 'message-attr': [ 

342 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)), 

343 (r'(\s*(?:depth|match|excluded):\s*)(\d+)', 

344 bygroups(Name.Attribute, Number.Integer)), 

345 (r'(\s*filename:\s*)(.+\n)', 

346 bygroups(Name.Attribute, String)), 

347 default('#pop'), 

348 ], 

349 'message': [ 

350 (r'\fmessage\}\n', Keyword, '#pop'), 

351 (r'\fheader\{\n', Keyword, 'header'), 

352 (r'\fbody\{\n', Keyword, 'body'), 

353 ], 

354 'header': [ 

355 (r'\fheader\}\n', Keyword, '#pop'), 

356 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)', 

357 bygroups(Name.Attribute, String)), 

358 (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)', 

359 bygroups(Generic.Strong, Literal, Name.Tag)), 

360 ], 

361 'body': [ 

362 (r'\fpart\{\n', Keyword, 'part'), 

363 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')), 

364 (r'\fbody\}\n', Keyword, '#pop'), 

365 ], 

366 'part-attr': [ 

367 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), 

368 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)', 

369 bygroups(Punctuation, Name.Attribute, String)), 

370 (r'(,\s*)(Content-type:\s*)(.+\n)', 

371 bygroups(Punctuation, Name.Attribute, String)), 

372 default('#pop'), 

373 ], 

374 'part': [ 

375 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'), 

376 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')), 

377 (r'^Non-text part: .*\n', Comment), 

378 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code), 

379 ], 

380 } 

381 

382 def analyse_text(text): 

383 return 1.0 if text.startswith('\fmessage{') else 0.0 

384 

385 def __init__(self, **options): 

386 self.body_lexer = options.get('body_lexer', None) 

387 RegexLexer.__init__(self, **options) 

388 

389 

390class KernelLogLexer(RegexLexer): 

391 """ 

392 For Linux Kernel log ("dmesg") output. 

393 """ 

394 name = 'Kernel log' 

395 aliases = ['kmsg', 'dmesg'] 

396 filenames = ['*.kmsg', '*.dmesg'] 

397 url = 'https://fr.wikipedia.org/wiki/Dmesg' 

398 version_added = '2.6' 

399 

400 tokens = { 

401 'root': [ 

402 (r'^[^:]+:debug : (?=\[)', Text, 'debug'), 

403 (r'^[^:]+:info : (?=\[)', Text, 'info'), 

404 (r'^[^:]+:warn : (?=\[)', Text, 'warn'), 

405 (r'^[^:]+:notice: (?=\[)', Text, 'warn'), 

406 (r'^[^:]+:err : (?=\[)', Text, 'error'), 

407 (r'^[^:]+:crit : (?=\[)', Text, 'error'), 

408 (r'^(?=\[)', Text, 'unknown'), 

409 ], 

410 'unknown': [ 

411 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'), 

412 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'), 

413 default('info'), 

414 ], 

415 'base': [ 

416 (r'\[[0-9. ]+\] ', Number), 

417 (r'(?<=\] ).+?:', Keyword), 

418 (r'\n', Text, '#pop'), 

419 ], 

420 'debug': [ 

421 include('base'), 

422 (r'.+\n', Comment, '#pop') 

423 ], 

424 'info': [ 

425 include('base'), 

426 (r'.+\n', Text, '#pop') 

427 ], 

428 'warn': [ 

429 include('base'), 

430 (r'.+\n', Generic.Strong, '#pop') 

431 ], 

432 'error': [ 

433 include('base'), 

434 (r'.+\n', Generic.Error, '#pop') 

435 ] 

436 }