Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textfmts.py: 58%

1"""

2 pygments.lexers.textfmts

3 ~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for various text formats.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexers import guess_lexer, get_lexer_by_name

14from pygments.lexer import RegexLexer, bygroups, default, include

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Number, Generic, Literal, Punctuation

17from pygments.util import ClassNotFound

19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',

20 'NotmuchLexer', 'KernelLogLexer']

23class IrcLogsLexer(RegexLexer):

24 """

25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.

26 """

28 name = 'IRC logs'

29 aliases = ['irc']

30 filenames = ['*.weechatlog']

31 mimetypes = ['text/x-irclog']

33 flags = re.VERBOSE | re.MULTILINE

34 timestamp = r"""

35 (

36 # irssi / xchat and others

37 (?: \[|\()? # Opening bracket or paren for the timestamp

38 (?: # Timestamp

39 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits

40 (?:\d{1,4})

41 [T ])? # Date/time separator: T or space

42 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits

43 (?: \d?\d)

44 )

45 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp

46 |

47 # weechat

48 \d{4}\s\w{3}\s\d{2}\s # Date

49 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace

50 |

51 # xchat

52 \w{3}\s\d{2}\s # Date

53 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace

54 )?

55 """

56 tokens = {

57 'root': [

58 # log start/end

59 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),

60 # hack

61 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),

62 # normal msgs

63 ("^" + timestamp + r"""

64 (\s*<.*?>\s*) # Nick """,

65 bygroups(Comment.Preproc, Name.Tag), 'msg'),

66 # /me msgs

67 ("^" + timestamp + r"""

68 (\s*[*]\s+) # Star

69 (\S+\s+.*?\n) # Nick + rest of message """,

70 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),

71 # join/part msgs

72 ("^" + timestamp + r"""

73 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols

74 (\S+\s+) # Nick + Space

75 (.*?\n) # Rest of message """,

76 bygroups(Comment.Preproc, Keyword, String, Comment)),

77 (r"^.*?\n", Text),

78 ],

79 'msg': [

80 (r"\S+:(?!//)", Name.Attribute), # Prefix

81 (r".*\n", Text, '#pop'),

82 ],

83 }

86class GettextLexer(RegexLexer):

87 """

88 Lexer for Gettext catalog files.

90 .. versionadded:: 0.9

91 """

92 name = 'Gettext Catalog'

93 aliases = ['pot', 'po']

94 filenames = ['*.pot', '*.po']

95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']

97 tokens = {

98 'root': [

99 (r'^#,\s.*?$', Keyword.Type),

100 (r'^#:\s.*?$', Keyword.Declaration),

101 # (r'^#$', Comment),

102 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),

103 (r'^(")([A-Za-z-]+:)(.*")$',

104 bygroups(String, Name.Property, String)),

105 (r'^".*"$', String),

106 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',

107 bygroups(Name.Variable, Text, String)),

108 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',

109 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),

110 ]

111 }

112

113

114class HttpLexer(RegexLexer):

115 """

116 Lexer for HTTP sessions.

117

118 .. versionadded:: 1.5

119 """

120

121 name = 'HTTP'

122 aliases = ['http']

123

124 flags = re.DOTALL

125

126 def get_tokens_unprocessed(self, text, stack=('root',)):

127 """Reset the content-type state."""

128 self.content_type = None

129 return RegexLexer.get_tokens_unprocessed(self, text, stack)

130

131 def header_callback(self, match):

132 if match.group(1).lower() == 'content-type':

133 content_type = match.group(5).strip()

134 if ';' in content_type:

135 content_type = content_type[:content_type.find(';')].strip()

136 self.content_type = content_type

137 yield match.start(1), Name.Attribute, match.group(1)

138 yield match.start(2), Text, match.group(2)

139 yield match.start(3), Operator, match.group(3)

140 yield match.start(4), Text, match.group(4)

141 yield match.start(5), Literal, match.group(5)

142 yield match.start(6), Text, match.group(6)

143

144 def continuous_header_callback(self, match):

145 yield match.start(1), Text, match.group(1)

146 yield match.start(2), Literal, match.group(2)

147 yield match.start(3), Text, match.group(3)

148

149 def content_callback(self, match):

150 content_type = getattr(self, 'content_type', None)

151 content = match.group()

152 offset = match.start()

153 if content_type:

154 from pygments.lexers import get_lexer_for_mimetype

155 possible_lexer_mimetypes = [content_type]

156 if '+' in content_type:

157 # application/calendar+xml can be treated as application/xml

158 # if there's not a better match.

159 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',

160 content_type)

161 possible_lexer_mimetypes.append(general_type)

162

163 for i in possible_lexer_mimetypes:

164 try:

165 lexer = get_lexer_for_mimetype(i)

166 except ClassNotFound:

167 pass

168 else:

169 for idx, token, value in lexer.get_tokens_unprocessed(content):

170 yield offset + idx, token, value

171 return

172 yield offset, Text, content

173

174 tokens = {

175 'root': [

176 (r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)'

177 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',

178 bygroups(Name.Function, Text, Name.Namespace, Text,

179 Keyword.Reserved, Operator, Number, Text),

180 'headers'),

181 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',

182 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,

183 Name.Exception, Text),

184 'headers'),

185 ],

186 'headers': [

187 (r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback),

188 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),

189 (r'\r?\n', Text, 'content')

190 ],

191 'content': [

192 (r'.+', content_callback)

193 ]

194 }

195

196 def analyse_text(text):

197 return any (

198 re.search(pattern, text) is not None

199 for pattern in (

200 r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',

201 r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',

202 )

203 )

204

205

206class TodotxtLexer(RegexLexer):

207 """

208 Lexer for Todo.txt todo list format.

209

210 .. versionadded:: 2.0

211 """

212

213 name = 'Todotxt'

214 url = 'http://todotxt.com/'

215 aliases = ['todotxt']

216 # *.todotxt is not a standard extension for Todo.txt files; including it

217 # makes testing easier, and also makes autodetecting file type easier.

218 filenames = ['todo.txt', '*.todotxt']

219 mimetypes = ['text/x-todo']

220

221 # Aliases mapping standard token types of Todo.txt format concepts

222 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks

223 IncompleteTaskText = Text # Incomplete tasks should look like plain text

224

225 # Priority should have most emphasis to indicate importance of tasks

226 Priority = Generic.Heading

227 # Dates should have next most emphasis because time is important

228 Date = Generic.Subheading

229

230 # Project and context should have equal weight, and be in different colors

231 Project = Generic.Error

232 Context = String

233

234 # If tag functionality is added, it should have the same weight as Project

235 # and Context, and a different color. Generic.Traceback would work well.

236

237 # Regex patterns for building up rules; dates, priorities, projects, and

238 # contexts are all atomic

239 # TODO: Make date regex more ISO 8601 compliant

240 date_regex = r'\d{4,}-\d{2}-\d{2}'

241 priority_regex = r'$[A-Z]$'

242 project_regex = r'\+\S+'

243 context_regex = r'@\S+'

244

245 # Compound regex expressions

246 complete_one_date_regex = r'(x )(' + date_regex + r')'

247 complete_two_date_regex = (complete_one_date_regex + r'( )(' +

248 date_regex + r')')

249 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'

250

251 tokens = {

252 # Should parse starting at beginning of line; each line is a task

253 'root': [

254 # Complete task entry points: two total:

255 # 1. Complete task with two dates

256 (complete_two_date_regex, bygroups(CompleteTaskText, Date,

257 CompleteTaskText, Date),

258 'complete'),

259 # 2. Complete task with one date

260 (complete_one_date_regex, bygroups(CompleteTaskText, Date),

261 'complete'),

262

263 # Incomplete task entry points: six total:

264 # 1. Priority plus date

265 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),

266 'incomplete'),

267 # 2. Priority only

268 (priority_regex, Priority, 'incomplete'),

269 # 3. Leading date

270 (date_regex, Date, 'incomplete'),

271 # 4. Leading context

272 (context_regex, Context, 'incomplete'),

273 # 5. Leading project

274 (project_regex, Project, 'incomplete'),

275 # 6. Non-whitespace catch-all

276 (r'\S+', IncompleteTaskText, 'incomplete'),

277 ],

278

279 # Parse a complete task

280 'complete': [

281 # Newline indicates end of task, should return to root

282 (r'\s*\n', CompleteTaskText, '#pop'),

283 # Tokenize contexts and projects

284 (context_regex, Context),

285 (project_regex, Project),

286 # Tokenize non-whitespace text

287 (r'\S+', CompleteTaskText),

288 # Tokenize whitespace not containing a newline

289 (r'\s+', CompleteTaskText),

290 ],

291

292 # Parse an incomplete task

293 'incomplete': [

294 # Newline indicates end of task, should return to root

295 (r'\s*\n', IncompleteTaskText, '#pop'),

296 # Tokenize contexts and projects

297 (context_regex, Context),

298 (project_regex, Project),

299 # Tokenize non-whitespace text

300 (r'\S+', IncompleteTaskText),

301 # Tokenize whitespace not containing a newline

302 (r'\s+', IncompleteTaskText),

303 ],

304 }

305

306

307class NotmuchLexer(RegexLexer):

308 """

309 For Notmuch email text format.

310

311 .. versionadded:: 2.5

312

313 Additional options accepted:

314

315 `body_lexer`

316 If given, highlight the contents of the message body with the specified

317 lexer, else guess it according to the body content (default: ``None``).

318 """

319

320 name = 'Notmuch'

321 url = 'https://notmuchmail.org/'

322 aliases = ['notmuch']

323

324 def _highlight_code(self, match):

325 code = match.group(1)

326

327 try:

328 if self.body_lexer:

329 lexer = get_lexer_by_name(self.body_lexer)

330 else:

331 lexer = guess_lexer(code.strip())

332 except ClassNotFound:

333 lexer = get_lexer_by_name('text')

334

335 yield from lexer.get_tokens_unprocessed(code)

336

337 tokens = {

338 'root': [

339 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),

340 ],

341 'message-attr': [

342 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),

343 (r'(\s*(?:depth|match|excluded):\s*)(\d+)',

344 bygroups(Name.Attribute, Number.Integer)),

345 (r'(\s*filename:\s*)(.+\n)',

346 bygroups(Name.Attribute, String)),

347 default('#pop'),

348 ],

349 'message': [

350 (r'\fmessage\}\n', Keyword, '#pop'),

351 (r'\fheader\{\n', Keyword, 'header'),

352 (r'\fbody\{\n', Keyword, 'body'),

353 ],

354 'header': [

355 (r'\fheader\}\n', Keyword, '#pop'),

356 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',

357 bygroups(Name.Attribute, String)),

358 (r'(.*)(\s*$.*$)(\s*$.*$\n)',

359 bygroups(Generic.Strong, Literal, Name.Tag)),

360 ],

361 'body': [

362 (r'\fpart\{\n', Keyword, 'part'),

363 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),

364 (r'\fbody\}\n', Keyword, '#pop'),

365 ],

366 'part-attr': [

367 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),

368 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',

369 bygroups(Punctuation, Name.Attribute, String)),

370 (r'(,\s*)(Content-type:\s*)(.+\n)',

371 bygroups(Punctuation, Name.Attribute, String)),

372 default('#pop'),

373 ],

374 'part': [

375 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),

376 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),

377 (r'^Non-text part: .*\n', Comment),

378 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),

379 ],

380 }

381

382 def analyse_text(text):

383 return 1.0 if text.startswith('\fmessage{') else 0.0

384

385 def __init__(self, **options):

386 self.body_lexer = options.get('body_lexer', None)

387 RegexLexer.__init__(self, **options)

388

389

390class KernelLogLexer(RegexLexer):

391 """

392 For Linux Kernel log ("dmesg") output.

393

394 .. versionadded:: 2.6

395 """

396 name = 'Kernel log'

397 aliases = ['kmsg', 'dmesg']

398 filenames = ['*.kmsg', '*.dmesg']

399

400 tokens = {

401 'root': [

402 (r'^[^:]+:debug : (?=\[)', Text, 'debug'),

403 (r'^[^:]+:info : (?=\[)', Text, 'info'),

404 (r'^[^:]+:warn : (?=\[)', Text, 'warn'),

405 (r'^[^:]+:notice: (?=\[)', Text, 'warn'),

406 (r'^[^:]+:err : (?=\[)', Text, 'error'),

407 (r'^[^:]+:crit : (?=\[)', Text, 'error'),

408 (r'^(?=\[)', Text, 'unknown'),

409 ],

410 'unknown': [

411 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),

412 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),

413 default('info'),

414 ],

415 'base': [

416 (r'\[[0-9. ]+\] ', Number),

417 (r'(?<=\] ).+?:', Keyword),

418 (r'\n', Text, '#pop'),

419 ],

420 'debug': [

421 include('base'),

422 (r'.+\n', Comment, '#pop')

423 ],

424 'info': [

425 include('base'),

426 (r'.+\n', Text, '#pop')

427 ],

428 'warn': [

429 include('base'),

430 (r'.+\n', Generic.Strong, '#pop')

431 ],

432 'error': [

433 include('base'),

434 (r'.+\n', Generic.Error, '#pop')

435 ]

436 }