Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textfmts.py: 58%

109 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1""" 

2 pygments.lexers.textfmts 

3 ~~~~~~~~~~~~~~~~~~~~~~~~ 

4 

5 Lexers for various text formats. 

6 

7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. 

8 :license: BSD, see LICENSE for details. 

9""" 

10 

11import re 

12 

13from pygments.lexers import guess_lexer, get_lexer_by_name 

14from pygments.lexer import RegexLexer, bygroups, default, include 

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 

16 Number, Generic, Literal, Punctuation 

17from pygments.util import ClassNotFound 

18 

19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer', 

20 'NotmuchLexer', 'KernelLogLexer'] 

21 

22 

23class IrcLogsLexer(RegexLexer): 

24 """ 

25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style. 

26 """ 

27 

28 name = 'IRC logs' 

29 aliases = ['irc'] 

30 filenames = ['*.weechatlog'] 

31 mimetypes = ['text/x-irclog'] 

32 

33 flags = re.VERBOSE | re.MULTILINE 

34 timestamp = r""" 

35 ( 

36 # irssi / xchat and others 

37 (?: \[|\()? # Opening bracket or paren for the timestamp 

38 (?: # Timestamp 

39 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits 

40 (?:\d{1,4}) 

41 [T ])? # Date/time separator: T or space 

42 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits 

43 (?: \d?\d) 

44 ) 

45 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp 

46 | 

47 # weechat 

48 \d{4}\s\w{3}\s\d{2}\s # Date 

49 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace 

50 | 

51 # xchat 

52 \w{3}\s\d{2}\s # Date 

53 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace 

54 )? 

55 """ 

56 tokens = { 

57 'root': [ 

58 # log start/end 

59 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment), 

60 # hack 

61 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)), 

62 # normal msgs 

63 ("^" + timestamp + r""" 

64 (\s*<.*?>\s*) # Nick """, 

65 bygroups(Comment.Preproc, Name.Tag), 'msg'), 

66 # /me msgs 

67 ("^" + timestamp + r""" 

68 (\s*[*]\s+) # Star 

69 (\S+\s+.*?\n) # Nick + rest of message """, 

70 bygroups(Comment.Preproc, Keyword, Generic.Inserted)), 

71 # join/part msgs 

72 ("^" + timestamp + r""" 

73 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols 

74 (\S+\s+) # Nick + Space 

75 (.*?\n) # Rest of message """, 

76 bygroups(Comment.Preproc, Keyword, String, Comment)), 

77 (r"^.*?\n", Text), 

78 ], 

79 'msg': [ 

80 (r"\S+:(?!//)", Name.Attribute), # Prefix 

81 (r".*\n", Text, '#pop'), 

82 ], 

83 } 

84 

85 

86class GettextLexer(RegexLexer): 

87 """ 

88 Lexer for Gettext catalog files. 

89 

90 .. versionadded:: 0.9 

91 """ 

92 name = 'Gettext Catalog' 

93 aliases = ['pot', 'po'] 

94 filenames = ['*.pot', '*.po'] 

95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext'] 

96 

97 tokens = { 

98 'root': [ 

99 (r'^#,\s.*?$', Keyword.Type), 

100 (r'^#:\s.*?$', Keyword.Declaration), 

101 # (r'^#$', Comment), 

102 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single), 

103 (r'^(")([A-Za-z-]+:)(.*")$', 

104 bygroups(String, Name.Property, String)), 

105 (r'^".*"$', String), 

106 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$', 

107 bygroups(Name.Variable, Text, String)), 

108 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$', 

109 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)), 

110 ] 

111 } 

112 

113 

114class HttpLexer(RegexLexer): 

115 """ 

116 Lexer for HTTP sessions. 

117 

118 .. versionadded:: 1.5 

119 """ 

120 

121 name = 'HTTP' 

122 aliases = ['http'] 

123 

124 flags = re.DOTALL 

125 

126 def get_tokens_unprocessed(self, text, stack=('root',)): 

127 """Reset the content-type state.""" 

128 self.content_type = None 

129 return RegexLexer.get_tokens_unprocessed(self, text, stack) 

130 

131 def header_callback(self, match): 

132 if match.group(1).lower() == 'content-type': 

133 content_type = match.group(5).strip() 

134 if ';' in content_type: 

135 content_type = content_type[:content_type.find(';')].strip() 

136 self.content_type = content_type 

137 yield match.start(1), Name.Attribute, match.group(1) 

138 yield match.start(2), Text, match.group(2) 

139 yield match.start(3), Operator, match.group(3) 

140 yield match.start(4), Text, match.group(4) 

141 yield match.start(5), Literal, match.group(5) 

142 yield match.start(6), Text, match.group(6) 

143 

144 def continuous_header_callback(self, match): 

145 yield match.start(1), Text, match.group(1) 

146 yield match.start(2), Literal, match.group(2) 

147 yield match.start(3), Text, match.group(3) 

148 

149 def content_callback(self, match): 

150 content_type = getattr(self, 'content_type', None) 

151 content = match.group() 

152 offset = match.start() 

153 if content_type: 

154 from pygments.lexers import get_lexer_for_mimetype 

155 possible_lexer_mimetypes = [content_type] 

156 if '+' in content_type: 

157 # application/calendar+xml can be treated as application/xml 

158 # if there's not a better match. 

159 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2', 

160 content_type) 

161 possible_lexer_mimetypes.append(general_type) 

162 

163 for i in possible_lexer_mimetypes: 

164 try: 

165 lexer = get_lexer_for_mimetype(i) 

166 except ClassNotFound: 

167 pass 

168 else: 

169 for idx, token, value in lexer.get_tokens_unprocessed(content): 

170 yield offset + idx, token, value 

171 return 

172 yield offset, Text, content 

173 

174 tokens = { 

175 'root': [ 

176 (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH|CONNECT)( +)([^ ]+)( +)' 

177 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)', 

178 bygroups(Name.Function, Text, Name.Namespace, Text, 

179 Keyword.Reserved, Operator, Number, Text), 

180 'headers'), 

181 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)', 

182 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text, 

183 Name.Exception, Text), 

184 'headers'), 

185 ], 

186 'headers': [ 

187 (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback), 

188 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback), 

189 (r'\r?\n', Text, 'content') 

190 ], 

191 'content': [ 

192 (r'.+', content_callback) 

193 ] 

194 } 

195 

196 def analyse_text(text): 

197 return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /', 

198 'OPTIONS /', 'TRACE /', 'PATCH /', 'CONNECT ')) 

199 

200 

201class TodotxtLexer(RegexLexer): 

202 """ 

203 Lexer for Todo.txt todo list format. 

204 

205 .. versionadded:: 2.0 

206 """ 

207 

208 name = 'Todotxt' 

209 url = 'http://todotxt.com/' 

210 aliases = ['todotxt'] 

211 # *.todotxt is not a standard extension for Todo.txt files; including it 

212 # makes testing easier, and also makes autodetecting file type easier. 

213 filenames = ['todo.txt', '*.todotxt'] 

214 mimetypes = ['text/x-todo'] 

215 

216 # Aliases mapping standard token types of Todo.txt format concepts 

217 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks 

218 IncompleteTaskText = Text # Incomplete tasks should look like plain text 

219 

220 # Priority should have most emphasis to indicate importance of tasks 

221 Priority = Generic.Heading 

222 # Dates should have next most emphasis because time is important 

223 Date = Generic.Subheading 

224 

225 # Project and context should have equal weight, and be in different colors 

226 Project = Generic.Error 

227 Context = String 

228 

229 # If tag functionality is added, it should have the same weight as Project 

230 # and Context, and a different color. Generic.Traceback would work well. 

231 

232 # Regex patterns for building up rules; dates, priorities, projects, and 

233 # contexts are all atomic 

234 # TODO: Make date regex more ISO 8601 compliant 

235 date_regex = r'\d{4,}-\d{2}-\d{2}' 

236 priority_regex = r'\([A-Z]\)' 

237 project_regex = r'\+\S+' 

238 context_regex = r'@\S+' 

239 

240 # Compound regex expressions 

241 complete_one_date_regex = r'(x )(' + date_regex + r')' 

242 complete_two_date_regex = (complete_one_date_regex + r'( )(' + 

243 date_regex + r')') 

244 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')' 

245 

246 tokens = { 

247 # Should parse starting at beginning of line; each line is a task 

248 'root': [ 

249 # Complete task entry points: two total: 

250 # 1. Complete task with two dates 

251 (complete_two_date_regex, bygroups(CompleteTaskText, Date, 

252 CompleteTaskText, Date), 

253 'complete'), 

254 # 2. Complete task with one date 

255 (complete_one_date_regex, bygroups(CompleteTaskText, Date), 

256 'complete'), 

257 

258 # Incomplete task entry points: six total: 

259 # 1. Priority plus date 

260 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date), 

261 'incomplete'), 

262 # 2. Priority only 

263 (priority_regex, Priority, 'incomplete'), 

264 # 3. Leading date 

265 (date_regex, Date, 'incomplete'), 

266 # 4. Leading context 

267 (context_regex, Context, 'incomplete'), 

268 # 5. Leading project 

269 (project_regex, Project, 'incomplete'), 

270 # 6. Non-whitespace catch-all 

271 (r'\S+', IncompleteTaskText, 'incomplete'), 

272 ], 

273 

274 # Parse a complete task 

275 'complete': [ 

276 # Newline indicates end of task, should return to root 

277 (r'\s*\n', CompleteTaskText, '#pop'), 

278 # Tokenize contexts and projects 

279 (context_regex, Context), 

280 (project_regex, Project), 

281 # Tokenize non-whitespace text 

282 (r'\S+', CompleteTaskText), 

283 # Tokenize whitespace not containing a newline 

284 (r'\s+', CompleteTaskText), 

285 ], 

286 

287 # Parse an incomplete task 

288 'incomplete': [ 

289 # Newline indicates end of task, should return to root 

290 (r'\s*\n', IncompleteTaskText, '#pop'), 

291 # Tokenize contexts and projects 

292 (context_regex, Context), 

293 (project_regex, Project), 

294 # Tokenize non-whitespace text 

295 (r'\S+', IncompleteTaskText), 

296 # Tokenize whitespace not containing a newline 

297 (r'\s+', IncompleteTaskText), 

298 ], 

299 } 

300 

301 

302class NotmuchLexer(RegexLexer): 

303 """ 

304 For Notmuch email text format. 

305 

306 .. versionadded:: 2.5 

307 

308 Additional options accepted: 

309 

310 `body_lexer` 

311 If given, highlight the contents of the message body with the specified 

312 lexer, else guess it according to the body content (default: ``None``). 

313 """ 

314 

315 name = 'Notmuch' 

316 url = 'https://notmuchmail.org/' 

317 aliases = ['notmuch'] 

318 

319 def _highlight_code(self, match): 

320 code = match.group(1) 

321 

322 try: 

323 if self.body_lexer: 

324 lexer = get_lexer_by_name(self.body_lexer) 

325 else: 

326 lexer = guess_lexer(code.strip()) 

327 except ClassNotFound: 

328 lexer = get_lexer_by_name('text') 

329 

330 yield from lexer.get_tokens_unprocessed(code) 

331 

332 tokens = { 

333 'root': [ 

334 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')), 

335 ], 

336 'message-attr': [ 

337 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)), 

338 (r'(\s*(?:depth|match|excluded):\s*)(\d+)', 

339 bygroups(Name.Attribute, Number.Integer)), 

340 (r'(\s*filename:\s*)(.+\n)', 

341 bygroups(Name.Attribute, String)), 

342 default('#pop'), 

343 ], 

344 'message': [ 

345 (r'\fmessage\}\n', Keyword, '#pop'), 

346 (r'\fheader\{\n', Keyword, 'header'), 

347 (r'\fbody\{\n', Keyword, 'body'), 

348 ], 

349 'header': [ 

350 (r'\fheader\}\n', Keyword, '#pop'), 

351 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)', 

352 bygroups(Name.Attribute, String)), 

353 (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)', 

354 bygroups(Generic.Strong, Literal, Name.Tag)), 

355 ], 

356 'body': [ 

357 (r'\fpart\{\n', Keyword, 'part'), 

358 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')), 

359 (r'\fbody\}\n', Keyword, '#pop'), 

360 ], 

361 'part-attr': [ 

362 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)), 

363 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)', 

364 bygroups(Punctuation, Name.Attribute, String)), 

365 (r'(,\s*)(Content-type:\s*)(.+\n)', 

366 bygroups(Punctuation, Name.Attribute, String)), 

367 default('#pop'), 

368 ], 

369 'part': [ 

370 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'), 

371 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')), 

372 (r'^Non-text part: .*\n', Comment), 

373 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code), 

374 ], 

375 } 

376 

377 def analyse_text(text): 

378 return 1.0 if text.startswith('\fmessage{') else 0.0 

379 

380 def __init__(self, **options): 

381 self.body_lexer = options.get('body_lexer', None) 

382 RegexLexer.__init__(self, **options) 

383 

384 

385class KernelLogLexer(RegexLexer): 

386 """ 

387 For Linux Kernel log ("dmesg") output. 

388 

389 .. versionadded:: 2.6 

390 """ 

391 name = 'Kernel log' 

392 aliases = ['kmsg', 'dmesg'] 

393 filenames = ['*.kmsg', '*.dmesg'] 

394 

395 tokens = { 

396 'root': [ 

397 (r'^[^:]+:debug : (?=\[)', Text, 'debug'), 

398 (r'^[^:]+:info : (?=\[)', Text, 'info'), 

399 (r'^[^:]+:warn : (?=\[)', Text, 'warn'), 

400 (r'^[^:]+:notice: (?=\[)', Text, 'warn'), 

401 (r'^[^:]+:err : (?=\[)', Text, 'error'), 

402 (r'^[^:]+:crit : (?=\[)', Text, 'error'), 

403 (r'^(?=\[)', Text, 'unknown'), 

404 ], 

405 'unknown': [ 

406 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'), 

407 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'), 

408 default('info'), 

409 ], 

410 'base': [ 

411 (r'\[[0-9. ]+\] ', Number), 

412 (r'(?<=\] ).+?:', Keyword), 

413 (r'\n', Text, '#pop'), 

414 ], 

415 'debug': [ 

416 include('base'), 

417 (r'.+\n', Comment, '#pop') 

418 ], 

419 'info': [ 

420 include('base'), 

421 (r'.+\n', Text, '#pop') 

422 ], 

423 'warn': [ 

424 include('base'), 

425 (r'.+\n', Generic.Strong, '#pop') 

426 ], 

427 'error': [ 

428 include('base'), 

429 (r'.+\n', Generic.Error, '#pop') 

430 ] 

431 }