Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textfmts.py: 58%
109 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1"""
2 pygments.lexers.textfmts
3 ~~~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for various text formats.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexers import guess_lexer, get_lexer_by_name
14from pygments.lexer import RegexLexer, bygroups, default, include
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Generic, Literal, Punctuation
17from pygments.util import ClassNotFound
19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
20 'NotmuchLexer', 'KernelLogLexer']
23class IrcLogsLexer(RegexLexer):
24 """
25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
26 """
28 name = 'IRC logs'
29 aliases = ['irc']
30 filenames = ['*.weechatlog']
31 mimetypes = ['text/x-irclog']
33 flags = re.VERBOSE | re.MULTILINE
34 timestamp = r"""
35 (
36 # irssi / xchat and others
37 (?: \[|\()? # Opening bracket or paren for the timestamp
38 (?: # Timestamp
39 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits
40 (?:\d{1,4})
41 [T ])? # Date/time separator: T or space
42 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits
43 (?: \d?\d)
44 )
45 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
46 |
47 # weechat
48 \d{4}\s\w{3}\s\d{2}\s # Date
49 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
50 |
51 # xchat
52 \w{3}\s\d{2}\s # Date
53 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
54 )?
55 """
56 tokens = {
57 'root': [
58 # log start/end
59 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
60 # hack
61 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
62 # normal msgs
63 ("^" + timestamp + r"""
64 (\s*<.*?>\s*) # Nick """,
65 bygroups(Comment.Preproc, Name.Tag), 'msg'),
66 # /me msgs
67 ("^" + timestamp + r"""
68 (\s*[*]\s+) # Star
69 (\S+\s+.*?\n) # Nick + rest of message """,
70 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
71 # join/part msgs
72 ("^" + timestamp + r"""
73 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
74 (\S+\s+) # Nick + Space
75 (.*?\n) # Rest of message """,
76 bygroups(Comment.Preproc, Keyword, String, Comment)),
77 (r"^.*?\n", Text),
78 ],
79 'msg': [
80 (r"\S+:(?!//)", Name.Attribute), # Prefix
81 (r".*\n", Text, '#pop'),
82 ],
83 }
86class GettextLexer(RegexLexer):
87 """
88 Lexer for Gettext catalog files.
90 .. versionadded:: 0.9
91 """
92 name = 'Gettext Catalog'
93 aliases = ['pot', 'po']
94 filenames = ['*.pot', '*.po']
95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
97 tokens = {
98 'root': [
99 (r'^#,\s.*?$', Keyword.Type),
100 (r'^#:\s.*?$', Keyword.Declaration),
101 # (r'^#$', Comment),
102 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
103 (r'^(")([A-Za-z-]+:)(.*")$',
104 bygroups(String, Name.Property, String)),
105 (r'^".*"$', String),
106 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
107 bygroups(Name.Variable, Text, String)),
108 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
109 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
110 ]
111 }
114class HttpLexer(RegexLexer):
115 """
116 Lexer for HTTP sessions.
118 .. versionadded:: 1.5
119 """
121 name = 'HTTP'
122 aliases = ['http']
124 flags = re.DOTALL
126 def get_tokens_unprocessed(self, text, stack=('root',)):
127 """Reset the content-type state."""
128 self.content_type = None
129 return RegexLexer.get_tokens_unprocessed(self, text, stack)
131 def header_callback(self, match):
132 if match.group(1).lower() == 'content-type':
133 content_type = match.group(5).strip()
134 if ';' in content_type:
135 content_type = content_type[:content_type.find(';')].strip()
136 self.content_type = content_type
137 yield match.start(1), Name.Attribute, match.group(1)
138 yield match.start(2), Text, match.group(2)
139 yield match.start(3), Operator, match.group(3)
140 yield match.start(4), Text, match.group(4)
141 yield match.start(5), Literal, match.group(5)
142 yield match.start(6), Text, match.group(6)
144 def continuous_header_callback(self, match):
145 yield match.start(1), Text, match.group(1)
146 yield match.start(2), Literal, match.group(2)
147 yield match.start(3), Text, match.group(3)
149 def content_callback(self, match):
150 content_type = getattr(self, 'content_type', None)
151 content = match.group()
152 offset = match.start()
153 if content_type:
154 from pygments.lexers import get_lexer_for_mimetype
155 possible_lexer_mimetypes = [content_type]
156 if '+' in content_type:
157 # application/calendar+xml can be treated as application/xml
158 # if there's not a better match.
159 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
160 content_type)
161 possible_lexer_mimetypes.append(general_type)
163 for i in possible_lexer_mimetypes:
164 try:
165 lexer = get_lexer_for_mimetype(i)
166 except ClassNotFound:
167 pass
168 else:
169 for idx, token, value in lexer.get_tokens_unprocessed(content):
170 yield offset + idx, token, value
171 return
172 yield offset, Text, content
174 tokens = {
175 'root': [
176 (r'(GET|POST|PUT|DELETE|HEAD|OPTIONS|TRACE|PATCH|CONNECT)( +)([^ ]+)( +)'
177 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
178 bygroups(Name.Function, Text, Name.Namespace, Text,
179 Keyword.Reserved, Operator, Number, Text),
180 'headers'),
181 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
182 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
183 Name.Exception, Text),
184 'headers'),
185 ],
186 'headers': [
187 (r'([^\s:]+)( *)(:)( *)([^\r\n]+)(\r?\n|\Z)', header_callback),
188 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
189 (r'\r?\n', Text, 'content')
190 ],
191 'content': [
192 (r'.+', content_callback)
193 ]
194 }
196 def analyse_text(text):
197 return text.startswith(('GET /', 'POST /', 'PUT /', 'DELETE /', 'HEAD /',
198 'OPTIONS /', 'TRACE /', 'PATCH /', 'CONNECT '))
201class TodotxtLexer(RegexLexer):
202 """
203 Lexer for Todo.txt todo list format.
205 .. versionadded:: 2.0
206 """
208 name = 'Todotxt'
209 url = 'http://todotxt.com/'
210 aliases = ['todotxt']
211 # *.todotxt is not a standard extension for Todo.txt files; including it
212 # makes testing easier, and also makes autodetecting file type easier.
213 filenames = ['todo.txt', '*.todotxt']
214 mimetypes = ['text/x-todo']
216 # Aliases mapping standard token types of Todo.txt format concepts
217 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks
218 IncompleteTaskText = Text # Incomplete tasks should look like plain text
220 # Priority should have most emphasis to indicate importance of tasks
221 Priority = Generic.Heading
222 # Dates should have next most emphasis because time is important
223 Date = Generic.Subheading
225 # Project and context should have equal weight, and be in different colors
226 Project = Generic.Error
227 Context = String
229 # If tag functionality is added, it should have the same weight as Project
230 # and Context, and a different color. Generic.Traceback would work well.
232 # Regex patterns for building up rules; dates, priorities, projects, and
233 # contexts are all atomic
234 # TODO: Make date regex more ISO 8601 compliant
235 date_regex = r'\d{4,}-\d{2}-\d{2}'
236 priority_regex = r'\([A-Z]\)'
237 project_regex = r'\+\S+'
238 context_regex = r'@\S+'
240 # Compound regex expressions
241 complete_one_date_regex = r'(x )(' + date_regex + r')'
242 complete_two_date_regex = (complete_one_date_regex + r'( )(' +
243 date_regex + r')')
244 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
246 tokens = {
247 # Should parse starting at beginning of line; each line is a task
248 'root': [
249 # Complete task entry points: two total:
250 # 1. Complete task with two dates
251 (complete_two_date_regex, bygroups(CompleteTaskText, Date,
252 CompleteTaskText, Date),
253 'complete'),
254 # 2. Complete task with one date
255 (complete_one_date_regex, bygroups(CompleteTaskText, Date),
256 'complete'),
258 # Incomplete task entry points: six total:
259 # 1. Priority plus date
260 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
261 'incomplete'),
262 # 2. Priority only
263 (priority_regex, Priority, 'incomplete'),
264 # 3. Leading date
265 (date_regex, Date, 'incomplete'),
266 # 4. Leading context
267 (context_regex, Context, 'incomplete'),
268 # 5. Leading project
269 (project_regex, Project, 'incomplete'),
270 # 6. Non-whitespace catch-all
271 (r'\S+', IncompleteTaskText, 'incomplete'),
272 ],
274 # Parse a complete task
275 'complete': [
276 # Newline indicates end of task, should return to root
277 (r'\s*\n', CompleteTaskText, '#pop'),
278 # Tokenize contexts and projects
279 (context_regex, Context),
280 (project_regex, Project),
281 # Tokenize non-whitespace text
282 (r'\S+', CompleteTaskText),
283 # Tokenize whitespace not containing a newline
284 (r'\s+', CompleteTaskText),
285 ],
287 # Parse an incomplete task
288 'incomplete': [
289 # Newline indicates end of task, should return to root
290 (r'\s*\n', IncompleteTaskText, '#pop'),
291 # Tokenize contexts and projects
292 (context_regex, Context),
293 (project_regex, Project),
294 # Tokenize non-whitespace text
295 (r'\S+', IncompleteTaskText),
296 # Tokenize whitespace not containing a newline
297 (r'\s+', IncompleteTaskText),
298 ],
299 }
302class NotmuchLexer(RegexLexer):
303 """
304 For Notmuch email text format.
306 .. versionadded:: 2.5
308 Additional options accepted:
310 `body_lexer`
311 If given, highlight the contents of the message body with the specified
312 lexer, else guess it according to the body content (default: ``None``).
313 """
315 name = 'Notmuch'
316 url = 'https://notmuchmail.org/'
317 aliases = ['notmuch']
319 def _highlight_code(self, match):
320 code = match.group(1)
322 try:
323 if self.body_lexer:
324 lexer = get_lexer_by_name(self.body_lexer)
325 else:
326 lexer = guess_lexer(code.strip())
327 except ClassNotFound:
328 lexer = get_lexer_by_name('text')
330 yield from lexer.get_tokens_unprocessed(code)
332 tokens = {
333 'root': [
334 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
335 ],
336 'message-attr': [
337 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
338 (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
339 bygroups(Name.Attribute, Number.Integer)),
340 (r'(\s*filename:\s*)(.+\n)',
341 bygroups(Name.Attribute, String)),
342 default('#pop'),
343 ],
344 'message': [
345 (r'\fmessage\}\n', Keyword, '#pop'),
346 (r'\fheader\{\n', Keyword, 'header'),
347 (r'\fbody\{\n', Keyword, 'body'),
348 ],
349 'header': [
350 (r'\fheader\}\n', Keyword, '#pop'),
351 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
352 bygroups(Name.Attribute, String)),
353 (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
354 bygroups(Generic.Strong, Literal, Name.Tag)),
355 ],
356 'body': [
357 (r'\fpart\{\n', Keyword, 'part'),
358 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
359 (r'\fbody\}\n', Keyword, '#pop'),
360 ],
361 'part-attr': [
362 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
363 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
364 bygroups(Punctuation, Name.Attribute, String)),
365 (r'(,\s*)(Content-type:\s*)(.+\n)',
366 bygroups(Punctuation, Name.Attribute, String)),
367 default('#pop'),
368 ],
369 'part': [
370 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
371 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
372 (r'^Non-text part: .*\n', Comment),
373 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
374 ],
375 }
377 def analyse_text(text):
378 return 1.0 if text.startswith('\fmessage{') else 0.0
380 def __init__(self, **options):
381 self.body_lexer = options.get('body_lexer', None)
382 RegexLexer.__init__(self, **options)
385class KernelLogLexer(RegexLexer):
386 """
387 For Linux Kernel log ("dmesg") output.
389 .. versionadded:: 2.6
390 """
391 name = 'Kernel log'
392 aliases = ['kmsg', 'dmesg']
393 filenames = ['*.kmsg', '*.dmesg']
395 tokens = {
396 'root': [
397 (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
398 (r'^[^:]+:info : (?=\[)', Text, 'info'),
399 (r'^[^:]+:warn : (?=\[)', Text, 'warn'),
400 (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
401 (r'^[^:]+:err : (?=\[)', Text, 'error'),
402 (r'^[^:]+:crit : (?=\[)', Text, 'error'),
403 (r'^(?=\[)', Text, 'unknown'),
404 ],
405 'unknown': [
406 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
407 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
408 default('info'),
409 ],
410 'base': [
411 (r'\[[0-9. ]+\] ', Number),
412 (r'(?<=\] ).+?:', Keyword),
413 (r'\n', Text, '#pop'),
414 ],
415 'debug': [
416 include('base'),
417 (r'.+\n', Comment, '#pop')
418 ],
419 'info': [
420 include('base'),
421 (r'.+\n', Text, '#pop')
422 ],
423 'warn': [
424 include('base'),
425 (r'.+\n', Generic.Strong, '#pop')
426 ],
427 'error': [
428 include('base'),
429 (r'.+\n', Generic.Error, '#pop')
430 ]
431 }