Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pygments/lexers/textfmts.py: 62%

1"""

2 pygments.lexers.textfmts

3 ~~~~~~~~~~~~~~~~~~~~~~~~

5 Lexers for various text formats.

8 :license: BSD, see LICENSE for details.

9"""

11import re

13from pygments.lexers import guess_lexer, get_lexer_by_name

14from pygments.lexer import RegexLexer, bygroups, default, include

15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \

16 Number, Generic, Literal, Punctuation

17from pygments.util import ClassNotFound

19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',

20 'NotmuchLexer', 'KernelLogLexer']

23class IrcLogsLexer(RegexLexer):

24 """

25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.

26 """

28 name = 'IRC logs'

29 aliases = ['irc']

30 filenames = ['*.weechatlog']

31 mimetypes = ['text/x-irclog']

32 url = 'https://en.wikipedia.org/wiki/Internet_Relay_Chat'

33 version_added = ''

35 flags = re.VERBOSE | re.MULTILINE

36 timestamp = r"""

37 (

38 # irssi / xchat and others

39 (?: \[|\()? # Opening bracket or paren for the timestamp

40 (?: # Timestamp

41 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits

42 (?:\d{1,4})

43 [T ])? # Date/time separator: T or space

44 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits

45 (?: \d?\d)

46 )

47 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp

48 |

49 # weechat

50 \d{4}\s\w{3}\s\d{2}\s # Date

51 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace

52 |

53 # xchat

54 \w{3}\s\d{2}\s # Date

55 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace

56 )?

57 """

58 tokens = {

59 'root': [

60 # log start/end

61 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),

62 # hack

63 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),

64 # normal msgs

65 ("^" + timestamp + r"""

66 (\s*<.*?>\s*) # Nick """,

67 bygroups(Comment.Preproc, Name.Tag), 'msg'),

68 # /me msgs

69 ("^" + timestamp + r"""

70 (\s*[*]\s+) # Star

71 (\S+\s+.*?\n) # Nick + rest of message """,

72 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),

73 # join/part msgs

74 ("^" + timestamp + r"""

75 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols

76 (\S+\s+) # Nick + Space

77 (.*?\n) # Rest of message """,

78 bygroups(Comment.Preproc, Keyword, String, Comment)),

79 (r"^.*?\n", Text),

80 ],

81 'msg': [

82 (r"\S+:(?!//)", Name.Attribute), # Prefix

83 (r".*\n", Text, '#pop'),

84 ],

85 }

88class GettextLexer(RegexLexer):

89 """

90 Lexer for Gettext catalog files.

91 """

92 name = 'Gettext Catalog'

93 aliases = ['pot', 'po']

94 filenames = ['*.pot', '*.po']

95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']

96 url = 'https://www.gnu.org/software/gettext'

97 version_added = '0.9'

99 tokens = {

100 'root': [

101 (r'^#,\s.*?$', Keyword.Type),

102 (r'^#:\s.*?$', Keyword.Declaration),

103 # (r'^#$', Comment),

104 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),

105 (r'^(")([A-Za-z-]+:)(.*")$',

106 bygroups(String, Name.Property, String)),

107 (r'^".*"$', String),

108 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',

109 bygroups(Name.Variable, Text, String)),

110 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',

111 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),

112 ]

113 }

114

115

116class HttpLexer(RegexLexer):

117 """

118 Lexer for HTTP sessions.

119 """

120

121 name = 'HTTP'

122 aliases = ['http']

123 url = 'https://httpwg.org/specs'

124 version_added = '1.5'

125

126 flags = re.DOTALL

127

128 def get_tokens_unprocessed(self, text, stack=('root',)):

129 """Reset the content-type state."""

130 self.content_type = None

131 return RegexLexer.get_tokens_unprocessed(self, text, stack)

132

133 def header_callback(self, match):

134 if match.group(1).lower() == 'content-type':

135 content_type = match.group(5).strip()

136 if ';' in content_type:

137 content_type = content_type[:content_type.find(';')].strip()

138 self.content_type = content_type

139 yield match.start(1), Name.Attribute, match.group(1)

140 yield match.start(2), Text, match.group(2)

141 yield match.start(3), Operator, match.group(3)

142 yield match.start(4), Text, match.group(4)

143 yield match.start(5), Literal, match.group(5)

144 yield match.start(6), Text, match.group(6)

145

146 def continuous_header_callback(self, match):

147 yield match.start(1), Text, match.group(1)

148 yield match.start(2), Literal, match.group(2)

149 yield match.start(3), Text, match.group(3)

150

151 def content_callback(self, match):

152 content_type = getattr(self, 'content_type', None)

153 content = match.group()

154 offset = match.start()

155 if content_type:

156 from pygments.lexers import get_lexer_for_mimetype

157 possible_lexer_mimetypes = [content_type]

158 if '+' in content_type:

159 # application/calendar+xml can be treated as application/xml

160 # if there's not a better match.

161 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',

162 content_type)

163 possible_lexer_mimetypes.append(general_type)

164

165 for i in possible_lexer_mimetypes:

166 try:

167 lexer = get_lexer_for_mimetype(i)

168 except ClassNotFound:

169 pass

170 else:

171 for idx, token, value in lexer.get_tokens_unprocessed(content):

172 yield offset + idx, token, value

173 return

174 yield offset, Text, content

175

176 tokens = {

177 'root': [

178 (r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)'

179 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',

180 bygroups(Name.Function, Text, Name.Namespace, Text,

181 Keyword.Reserved, Operator, Number, Text),

182 'headers'),

183 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',

184 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,

185 Name.Exception, Text),

186 'headers'),

187 ],

188 'headers': [

189 (r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback),

190 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),

191 (r'\r?\n', Text, 'content')

192 ],

193 'content': [

194 (r'.+', content_callback)

195 ]

196 }

197

198 def analyse_text(text):

199 return any (

200 re.search(pattern, text) is not None

201 for pattern in (

202 r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',

203 r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',

204 )

205 )

206

207

208class TodotxtLexer(RegexLexer):

209 """

210 Lexer for Todo.txt todo list format.

211 """

212

213 name = 'Todotxt'

214 url = 'http://todotxt.com/'

215 aliases = ['todotxt']

216 version_added = '2.0'

217 # *.todotxt is not a standard extension for Todo.txt files; including it

218 # makes testing easier, and also makes autodetecting file type easier.

219 filenames = ['todo.txt', '*.todotxt']

220 mimetypes = ['text/x-todo']

221

222 # Aliases mapping standard token types of Todo.txt format concepts

223 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks

224 IncompleteTaskText = Text # Incomplete tasks should look like plain text

225

226 # Priority should have most emphasis to indicate importance of tasks

227 Priority = Generic.Heading

228 # Dates should have next most emphasis because time is important

229 Date = Generic.Subheading

230

231 # Project and context should have equal weight, and be in different colors

232 Project = Generic.Error

233 Context = String

234

235 # If tag functionality is added, it should have the same weight as Project

236 # and Context, and a different color. Generic.Traceback would work well.

237

238 # Regex patterns for building up rules; dates, priorities, projects, and

239 # contexts are all atomic

240 # TODO: Make date regex more ISO 8601 compliant

241 date_regex = r'\d{4,}-\d{2}-\d{2}'

242 priority_regex = r'$[A-Z]$'

243 project_regex = r'\+\S+'

244 context_regex = r'@\S+'

245

246 # Compound regex expressions

247 complete_one_date_regex = r'(x )(' + date_regex + r')'

248 complete_two_date_regex = (complete_one_date_regex + r'( )(' +

249 date_regex + r')')

250 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'

251

252 tokens = {

253 # Should parse starting at beginning of line; each line is a task

254 'root': [

255 # Complete task entry points: two total:

256 # 1. Complete task with two dates

257 (complete_two_date_regex, bygroups(CompleteTaskText, Date,

258 CompleteTaskText, Date),

259 'complete'),

260 # 2. Complete task with one date

261 (complete_one_date_regex, bygroups(CompleteTaskText, Date),

262 'complete'),

263

264 # Incomplete task entry points: six total:

265 # 1. Priority plus date

266 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),

267 'incomplete'),

268 # 2. Priority only

269 (priority_regex, Priority, 'incomplete'),

270 # 3. Leading date

271 (date_regex, Date, 'incomplete'),

272 # 4. Leading context

273 (context_regex, Context, 'incomplete'),

274 # 5. Leading project

275 (project_regex, Project, 'incomplete'),

276 # 6. Non-whitespace catch-all

277 (r'\S+', IncompleteTaskText, 'incomplete'),

278 ],

279

280 # Parse a complete task

281 'complete': [

282 # Newline indicates end of task, should return to root

283 (r'\s*\n', CompleteTaskText, '#pop'),

284 # Tokenize contexts and projects

285 (context_regex, Context),

286 (project_regex, Project),

287 # Tokenize non-whitespace text

288 (r'\S+', CompleteTaskText),

289 # Tokenize whitespace not containing a newline

290 (r'\s+', CompleteTaskText),

291 ],

292

293 # Parse an incomplete task

294 'incomplete': [

295 # Newline indicates end of task, should return to root

296 (r'\s*\n', IncompleteTaskText, '#pop'),

297 # Tokenize contexts and projects

298 (context_regex, Context),

299 (project_regex, Project),

300 # Tokenize non-whitespace text

301 (r'\S+', IncompleteTaskText),

302 # Tokenize whitespace not containing a newline

303 (r'\s+', IncompleteTaskText),

304 ],

305 }

306

307

308class NotmuchLexer(RegexLexer):

309 """

310 For Notmuch email text format.

311

312 Additional options accepted:

313

314 `body_lexer`

315 If given, highlight the contents of the message body with the specified

316 lexer, else guess it according to the body content (default: ``None``).

317 """

318

319 name = 'Notmuch'

320 url = 'https://notmuchmail.org/'

321 aliases = ['notmuch']

322 version_added = '2.5'

323

324 def _highlight_code(self, match):

325 code = match.group(1)

326

327 try:

328 if self.body_lexer:

329 lexer = get_lexer_by_name(self.body_lexer)

330 else:

331 lexer = guess_lexer(code.strip())

332 except ClassNotFound:

333 lexer = get_lexer_by_name('text')

334

335 yield from lexer.get_tokens_unprocessed(code)

336

337 tokens = {

338 'root': [

339 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),

340 ],

341 'message-attr': [

342 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),

343 (r'(\s*(?:depth|match|excluded):\s*)(\d+)',

344 bygroups(Name.Attribute, Number.Integer)),

345 (r'(\s*filename:\s*)(.+\n)',

346 bygroups(Name.Attribute, String)),

347 default('#pop'),

348 ],

349 'message': [

350 (r'\fmessage\}\n', Keyword, '#pop'),

351 (r'\fheader\{\n', Keyword, 'header'),

352 (r'\fbody\{\n', Keyword, 'body'),

353 ],

354 'header': [

355 (r'\fheader\}\n', Keyword, '#pop'),

356 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',

357 bygroups(Name.Attribute, String)),

358 (r'(.*)(\s*$.*$)(\s*$.*$\n)',

359 bygroups(Generic.Strong, Literal, Name.Tag)),

360 ],

361 'body': [

362 (r'\fpart\{\n', Keyword, 'part'),

363 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),

364 (r'\fbody\}\n', Keyword, '#pop'),

365 ],

366 'part-attr': [

367 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),

368 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',

369 bygroups(Punctuation, Name.Attribute, String)),

370 (r'(,\s*)(Content-type:\s*)(.+\n)',

371 bygroups(Punctuation, Name.Attribute, String)),

372 default('#pop'),

373 ],

374 'part': [

375 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),

376 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),

377 (r'^Non-text part: .*\n', Comment),

378 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),

379 ],

380 }

381

382 def analyse_text(text):

383 return 1.0 if text.startswith('\fmessage{') else 0.0

384

385 def __init__(self, **options):

386 self.body_lexer = options.get('body_lexer', None)

387 RegexLexer.__init__(self, **options)

388

389

390class KernelLogLexer(RegexLexer):

391 """

392 For Linux Kernel log ("dmesg") output.

393 """

394 name = 'Kernel log'

395 aliases = ['kmsg', 'dmesg']

396 filenames = ['*.kmsg', '*.dmesg']

397 url = 'https://fr.wikipedia.org/wiki/Dmesg'

398 version_added = '2.6'

399

400 tokens = {

401 'root': [

402 (r'^[^:]+:debug : (?=\[)', Text, 'debug'),

403 (r'^[^:]+:info : (?=\[)', Text, 'info'),

404 (r'^[^:]+:warn : (?=\[)', Text, 'warn'),

405 (r'^[^:]+:notice: (?=\[)', Text, 'warn'),

406 (r'^[^:]+:err : (?=\[)', Text, 'error'),

407 (r'^[^:]+:crit : (?=\[)', Text, 'error'),

408 (r'^(?=\[)', Text, 'unknown'),

409 ],

410 'unknown': [

411 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),

412 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),

413 default('info'),

414 ],

415 'base': [

416 (r'\[[0-9. ]+\] ', Number),

417 (r'(?<=\] ).+?:', Keyword),

418 (r'\n', Text, '#pop'),

419 ],

420 'debug': [

421 include('base'),

422 (r'.+\n', Comment, '#pop')

423 ],

424 'info': [

425 include('base'),

426 (r'.+\n', Text, '#pop')

427 ],

428 'warn': [

429 include('base'),

430 (r'.+\n', Generic.Strong, '#pop')

431 ],

432 'error': [

433 include('base'),

434 (r'.+\n', Generic.Error, '#pop')

435 ]

436 }