Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textfmts.py: 58%

1"""

2 pygments.lexers.textfmts

3 ~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for various text formats.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexers import guess_lexer, get_lexer_by_name

14from pygments.lexer import RegexLexer, bygroups, default, include

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Number, Generic, Literal, Punctuation

17from pygments.util import ClassNotFound

19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',

20 'NotmuchLexer', 'KernelLogLexer']

23class IrcLogsLexer(RegexLexer):

24 """

25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.

26 """

28 name = 'IRC logs'

29 aliases = ['irc']

30 filenames = ['*.weechatlog']

31 mimetypes = ['text/x-irclog']

33 flags = re.VERBOSE | re.MULTILINE

34 timestamp = r"""

35 (

36 # irssi / xchat and others

37 (?: \[|\()? # Opening bracket or paren for the timestamp

38 (?: # Timestamp

39 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits

40 (?:\d{1,4})

41 [T ])? # Date/time separator: T or space

42 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits

43 (?: \d?\d)

44 )

45 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp

46 |

47 # weechat

48 \d{4}\s\w{3}\s\d{2}\s # Date

49 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace

50 |

51 # xchat

52 \w{3}\s\d{2}\s # Date

53 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace

54 )?

55 """

56 tokens = {

57 'root': [

58 # log start/end

59 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),

60 # hack

61 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),

62 # normal msgs

63 ("^" + timestamp + r"""

64 (\s*<.*?>\s*) # Nick """,

65 bygroups(Comment.Preproc, Name.Tag), 'msg'),

66 # /me msgs

67 ("^" + timestamp + r"""

68 (\s*[*]\s+) # Star

69 (\S+\s+.*?\n) # Nick + rest of message """,

70 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),

71 # join/part msgs

72 ("^" + timestamp + r"""

73 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols

74 (\S+\s+) # Nick + Space

75 (.*?\n) # Rest of message """,

76 bygroups(Comment.Preproc, Keyword, String, Comment)),

77 (r"^.*?\n", Text),

78 ],

79 'msg': [

80 (r"\S+:(?!//)", Name.Attribute), # Prefix

81 (r".*\n", Text, '#pop'),

82 ],

83 }

86class GettextLexer(RegexLexer):

87 """

88 Lexer for Gettext catalog files.

90 .. versionadded:: 0.9

91 """

92 name = 'Gettext Catalog'

93 aliases = ['pot', 'po']

94 filenames = ['*.pot', '*.po']

95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']

97 tokens = {

98 'root': [

99 (r'^#,\s.*?$', Keyword.Type),

100 (r'^#:\s.*?$', Keyword.Declaration),

101 # (r'^#$', Comment),

102 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),

103 (r'^(")([A-Za-z-]+:)(.*")$',

104 bygroups(String, Name.Property, String)),

105 (r'^".*"$', String),

106 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',

107 bygroups(Name.Variable, Text, String)),

108 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',

109 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),

110 ]

111 }

112

113

114class HttpLexer(RegexLexer):

115 """

116 Lexer for HTTP sessions.

117

118 .. versionadded:: 1.5

119 """

120

121 name = 'HTTP'

122 aliases = ['http']

123

124 flags = re.DOTALL

125

126 def get_tokens_unprocessed(self, text, stack=('root',)):

127 """Reset the content-type state."""

128 self.content_type = None

129 return RegexLexer.get_tokens_unprocessed(self, text, stack)

130

131 def header_callback(self, match):

132 if match.group(1).lower() == 'content-type':

133 content_type = match.group(5).strip()

134 if ';' in content_type:

135 content_type = content_type[:content_type.find(';')].strip()

136 self.content_type = content_type

137 yield match.start(1), Name.Attribute, match.group(1)

138 yield match.start(2), Text, match.group(2)

139 yield match.start(3), Operator, match.group(3)

140 yield match.start(4), Text, match.group(4)

141 yield match.start(5), Literal, match.group(5)

142 yield match.start(6), Text, match.group(6)

143

144 def continuous_header_callback(self, match):

145 yield match.start(1), Text, match.group(1)

146 yield match.start(2), Literal, match.group(2)

147 yield match.start(3), Text, match.group(3)

148

149 def content_callback(self, match):

150 content_type = getattr(self, 'content_type', None)

151 content = match.group()

152 offset = match.start()

153 if content_type:

154 from pygments.lexers import get_lexer_for_mimetype

155 possible_lexer_mimetypes = [content_type]

156 if '+' in content_type:

157 # application/calendar+xml can be treated as application/xml

158 # if there's not a better match.

159 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',

160 content_type)

161 possible_lexer_mimetypes.append(general_type)

162

163 for i in possible_lexer_mimetypes:

164 try:

165 lexer = get_lexer_for_mimetype(i)

166 except ClassNotFound:

167 pass

168 else:

169 for idx, token, value in lexer.get_tokens_unprocessed(content):

170 yield offset + idx, token, value

171 return

172 yield offset, Text, content

173

174 tokens = {

175 'root': [

177 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',

178 bygroups(Name.Function, Text, Name.Namespace, Text,

179 Keyword.Reserved, Operator, Number, Text),

180 'headers'),

181 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',

182 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,

183 Name.Exception, Text),

184 'headers'),

185 ],

186 'headers': [

187 (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback),

188 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),

189 (r'\r?\n', Text, 'content')

190 ],

191 'content': [

192 (r'.+', content_callback)

193 ]

194 }

195

196 def analyse_text(text):

197 return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',

198 'OPTIONS /', 'TRACE /', 'PATCH /', 'CONNECT '))

199

200

201class TodotxtLexer(RegexLexer):

202 """

203 Lexer for Todo.txt todo list format.

204

205 .. versionadded:: 2.0

206 """

207

208 name = 'Todotxt'

209 url = 'http://todotxt.com/'

210 aliases = ['todotxt']

211 # *.todotxt is not a standard extension for Todo.txt files; including it

212 # makes testing easier, and also makes autodetecting file type easier.

213 filenames = ['todo.txt', '*.todotxt']

214 mimetypes = ['text/x-todo']

215

216 # Aliases mapping standard token types of Todo.txt format concepts

217 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks

218 IncompleteTaskText = Text # Incomplete tasks should look like plain text

219

220 # Priority should have most emphasis to indicate importance of tasks

221 Priority = Generic.Heading

222 # Dates should have next most emphasis because time is important

223 Date = Generic.Subheading

224

225 # Project and context should have equal weight, and be in different colors

226 Project = Generic.Error

227 Context = String

228

229 # If tag functionality is added, it should have the same weight as Project

230 # and Context, and a different color. Generic.Traceback would work well.

231

232 # Regex patterns for building up rules; dates, priorities, projects, and

233 # contexts are all atomic

234 # TODO: Make date regex more ISO 8601 compliant

235 date_regex = r'\d{4,}-\d{2}-\d{2}'

236 priority_regex = r'$[A-Z]$'

237 project_regex = r'\+\S+'

238 context_regex = r'@\S+'

239

240 # Compound regex expressions

241 complete_one_date_regex = r'(x )(' + date_regex + r')'

242 complete_two_date_regex = (complete_one_date_regex + r'( )(' +

243 date_regex + r')')

244 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'

245

246 tokens = {

247 # Should parse starting at beginning of line; each line is a task

248 'root': [

249 # Complete task entry points: two total:

250 # 1. Complete task with two dates

251 (complete_two_date_regex, bygroups(CompleteTaskText, Date,

252 CompleteTaskText, Date),

253 'complete'),

254 # 2. Complete task with one date

255 (complete_one_date_regex, bygroups(CompleteTaskText, Date),

256 'complete'),

257

258 # Incomplete task entry points: six total:

259 # 1. Priority plus date

260 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),

261 'incomplete'),

262 # 2. Priority only

263 (priority_regex, Priority, 'incomplete'),

264 # 3. Leading date

265 (date_regex, Date, 'incomplete'),

266 # 4. Leading context

267 (context_regex, Context, 'incomplete'),

268 # 5. Leading project

269 (project_regex, Project, 'incomplete'),

270 # 6. Non-whitespace catch-all

271 (r'\S+', IncompleteTaskText, 'incomplete'),

272 ],

273

274 # Parse a complete task

275 'complete': [

276 # Newline indicates end of task, should return to root

277 (r'\s*\n', CompleteTaskText, '#pop'),

278 # Tokenize contexts and projects

279 (context_regex, Context),

280 (project_regex, Project),

281 # Tokenize non-whitespace text

282 (r'\S+', CompleteTaskText),

283 # Tokenize whitespace not containing a newline

284 (r'\s+', CompleteTaskText),

285 ],

286

287 # Parse an incomplete task

288 'incomplete': [

289 # Newline indicates end of task, should return to root

290 (r'\s*\n', IncompleteTaskText, '#pop'),

291 # Tokenize contexts and projects

292 (context_regex, Context),

293 (project_regex, Project),

294 # Tokenize non-whitespace text

295 (r'\S+', IncompleteTaskText),

296 # Tokenize whitespace not containing a newline

297 (r'\s+', IncompleteTaskText),

298 ],

299 }

300

301

302class NotmuchLexer(RegexLexer):

303 """

304 For Notmuch email text format.

305

306 .. versionadded:: 2.5

307

308 Additional options accepted:

309

310 `body_lexer`

311 If given, highlight the contents of the message body with the specified

312 lexer, else guess it according to the body content (default: ``None``).

313 """

314

315 name = 'Notmuch'

316 url = 'https://notmuchmail.org/'

317 aliases = ['notmuch']

318

319 def _highlight_code(self, match):

320 code = match.group(1)

321

322 try:

323 if self.body_lexer:

324 lexer = get_lexer_by_name(self.body_lexer)

325 else:

326 lexer = guess_lexer(code.strip())

327 except ClassNotFound:

328 lexer = get_lexer_by_name('text')

329

330 yield from lexer.get_tokens_unprocessed(code)

331

332 tokens = {

333 'root': [

334 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),

335 ],

336 'message-attr': [

337 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),

338 (r'(\s*(?:depth|match|excluded):\s*)(\d+)',

339 bygroups(Name.Attribute, Number.Integer)),

340 (r'(\s*filename:\s*)(.+\n)',

341 bygroups(Name.Attribute, String)),

342 default('#pop'),

343 ],

344 'message': [

345 (r'\fmessage\}\n', Keyword, '#pop'),

346 (r'\fheader\{\n', Keyword, 'header'),

347 (r'\fbody\{\n', Keyword, 'body'),

348 ],

349 'header': [

350 (r'\fheader\}\n', Keyword, '#pop'),

351 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',

352 bygroups(Name.Attribute, String)),

353 (r'(.*)(\s*$.*$)(\s*$.*$\n)',

354 bygroups(Generic.Strong, Literal, Name.Tag)),

355 ],

356 'body': [

357 (r'\fpart\{\n', Keyword, 'part'),

358 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),

359 (r'\fbody\}\n', Keyword, '#pop'),

360 ],

361 'part-attr': [

362 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),

363 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',

364 bygroups(Punctuation, Name.Attribute, String)),

365 (r'(,\s*)(Content-type:\s*)(.+\n)',

366 bygroups(Punctuation, Name.Attribute, String)),

367 default('#pop'),

368 ],

369 'part': [

370 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),

371 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),

372 (r'^Non-text part: .*\n', Comment),

373 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),

374 ],

375 }

376

377 def analyse_text(text):

378 return 1.0 if text.startswith('\fmessage{') else 0.0

379

380 def __init__(self, **options):

381 self.body_lexer = options.get('body_lexer', None)

382 RegexLexer.__init__(self, **options)

383

384

385class KernelLogLexer(RegexLexer):

386 """

387 For Linux Kernel log ("dmesg") output.

388

389 .. versionadded:: 2.6

390 """

391 name = 'Kernel log'

392 aliases = ['kmsg', 'dmesg']

393 filenames = ['*.kmsg', '*.dmesg']

394

395 tokens = {

396 'root': [

397 (r'^[^:]+:debug : (?=\[)', Text, 'debug'),

398 (r'^[^:]+:info : (?=\[)', Text, 'info'),

399 (r'^[^:]+:warn : (?=\[)', Text, 'warn'),

400 (r'^[^:]+:notice: (?=\[)', Text, 'warn'),

401 (r'^[^:]+:err : (?=\[)', Text, 'error'),

402 (r'^[^:]+:crit : (?=\[)', Text, 'error'),

403 (r'^(?=\[)', Text, 'unknown'),

404 ],

405 'unknown': [

406 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),

407 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),

408 default('info'),

409 ],

410 'base': [

411 (r'\[[0-9. ]+\] ', Number),

412 (r'(?<=\] ).+?:', Keyword),

413 (r'\n', Text, '#pop'),

414 ],

415 'debug': [

416 include('base'),

417 (r'.+\n', Comment, '#pop')

418 ],

419 'info': [

420 include('base'),

421 (r'.+\n', Text, '#pop')

422 ],

423 'warn': [

424 include('base'),

425 (r'.+\n', Generic.Strong, '#pop')

426 ],

427 'error': [

428 include('base'),

429 (r'.+\n', Generic.Error, '#pop')

430 ]

431 }