Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pygments/lexers/textfmts.py: 58%
109 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-18 06:13 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-18 06:13 +0000
1"""
2 pygments.lexers.textfmts
3 ~~~~~~~~~~~~~~~~~~~~~~~~
5 Lexers for various text formats.
7 :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
8 :license: BSD, see LICENSE for details.
9"""
11import re
13from pygments.lexers import guess_lexer, get_lexer_by_name
14from pygments.lexer import RegexLexer, bygroups, default, include
15from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
16 Number, Generic, Literal, Punctuation
17from pygments.util import ClassNotFound
19__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
20 'NotmuchLexer', 'KernelLogLexer']
23class IrcLogsLexer(RegexLexer):
24 """
25 Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
26 """
28 name = 'IRC logs'
29 aliases = ['irc']
30 filenames = ['*.weechatlog']
31 mimetypes = ['text/x-irclog']
33 flags = re.VERBOSE | re.MULTILINE
34 timestamp = r"""
35 (
36 # irssi / xchat and others
37 (?: \[|\()? # Opening bracket or paren for the timestamp
38 (?: # Timestamp
39 (?: (?:\d{1,4} [-/])* # Date as - or /-separated groups of digits
40 (?:\d{1,4})
41 [T ])? # Date/time separator: T or space
42 (?: \d?\d [:.])* # Time as :/.-separated groups of 1 or 2 digits
43 (?: \d?\d)
44 )
45 (?: \]|\))?\s+ # Closing bracket or paren for the timestamp
46 |
47 # weechat
48 \d{4}\s\w{3}\s\d{2}\s # Date
49 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
50 |
51 # xchat
52 \w{3}\s\d{2}\s # Date
53 \d{2}:\d{2}:\d{2}\s+ # Time + Whitespace
54 )?
55 """
56 tokens = {
57 'root': [
58 # log start/end
59 (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
60 # hack
61 ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
62 # normal msgs
63 ("^" + timestamp + r"""
64 (\s*<.*?>\s*) # Nick """,
65 bygroups(Comment.Preproc, Name.Tag), 'msg'),
66 # /me msgs
67 ("^" + timestamp + r"""
68 (\s*[*]\s+) # Star
69 (\S+\s+.*?\n) # Nick + rest of message """,
70 bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
71 # join/part msgs
72 ("^" + timestamp + r"""
73 (\s*(?:\*{3}|<?-[!@=P]?->?)\s*) # Star(s) or symbols
74 (\S+\s+) # Nick + Space
75 (.*?\n) # Rest of message """,
76 bygroups(Comment.Preproc, Keyword, String, Comment)),
77 (r"^.*?\n", Text),
78 ],
79 'msg': [
80 (r"\S+:(?!//)", Name.Attribute), # Prefix
81 (r".*\n", Text, '#pop'),
82 ],
83 }
86class GettextLexer(RegexLexer):
87 """
88 Lexer for Gettext catalog files.
90 .. versionadded:: 0.9
91 """
92 name = 'Gettext Catalog'
93 aliases = ['pot', 'po']
94 filenames = ['*.pot', '*.po']
95 mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
97 tokens = {
98 'root': [
99 (r'^#,\s.*?$', Keyword.Type),
100 (r'^#:\s.*?$', Keyword.Declaration),
101 # (r'^#$', Comment),
102 (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
103 (r'^(")([A-Za-z-]+:)(.*")$',
104 bygroups(String, Name.Property, String)),
105 (r'^".*"$', String),
106 (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
107 bygroups(Name.Variable, Text, String)),
108 (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
109 bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
110 ]
111 }
114class HttpLexer(RegexLexer):
115 """
116 Lexer for HTTP sessions.
118 .. versionadded:: 1.5
119 """
121 name = 'HTTP'
122 aliases = ['http']
124 flags = re.DOTALL
126 def get_tokens_unprocessed(self, text, stack=('root',)):
127 """Reset the content-type state."""
128 self.content_type = None
129 return RegexLexer.get_tokens_unprocessed(self, text, stack)
131 def header_callback(self, match):
132 if match.group(1).lower() == 'content-type':
133 content_type = match.group(5).strip()
134 if ';' in content_type:
135 content_type = content_type[:content_type.find(';')].strip()
136 self.content_type = content_type
137 yield match.start(1), Name.Attribute, match.group(1)
138 yield match.start(2), Text, match.group(2)
139 yield match.start(3), Operator, match.group(3)
140 yield match.start(4), Text, match.group(4)
141 yield match.start(5), Literal, match.group(5)
142 yield match.start(6), Text, match.group(6)
144 def continuous_header_callback(self, match):
145 yield match.start(1), Text, match.group(1)
146 yield match.start(2), Literal, match.group(2)
147 yield match.start(3), Text, match.group(3)
149 def content_callback(self, match):
150 content_type = getattr(self, 'content_type', None)
151 content = match.group()
152 offset = match.start()
153 if content_type:
154 from pygments.lexers import get_lexer_for_mimetype
155 possible_lexer_mimetypes = [content_type]
156 if '+' in content_type:
157 # application/calendar+xml can be treated as application/xml
158 # if there's not a better match.
159 general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
160 content_type)
161 possible_lexer_mimetypes.append(general_type)
163 for i in possible_lexer_mimetypes:
164 try:
165 lexer = get_lexer_for_mimetype(i)
166 except ClassNotFound:
167 pass
168 else:
169 for idx, token, value in lexer.get_tokens_unprocessed(content):
170 yield offset + idx, token, value
171 return
172 yield offset, Text, content
174 tokens = {
175 'root': [
176 (r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)'
177 r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
178 bygroups(Name.Function, Text, Name.Namespace, Text,
179 Keyword.Reserved, Operator, Number, Text),
180 'headers'),
181 (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
182 bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
183 Name.Exception, Text),
184 'headers'),
185 ],
186 'headers': [
187 (r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback),
188 (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
189 (r'\r?\n', Text, 'content')
190 ],
191 'content': [
192 (r'.+', content_callback)
193 ]
194 }
196 def analyse_text(text):
197 return any (
198 re.search(pattern, text) is not None
199 for pattern in (
200 r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
201 r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
202 )
203 )
206class TodotxtLexer(RegexLexer):
207 """
208 Lexer for Todo.txt todo list format.
210 .. versionadded:: 2.0
211 """
213 name = 'Todotxt'
214 url = 'http://todotxt.com/'
215 aliases = ['todotxt']
216 # *.todotxt is not a standard extension for Todo.txt files; including it
217 # makes testing easier, and also makes autodetecting file type easier.
218 filenames = ['todo.txt', '*.todotxt']
219 mimetypes = ['text/x-todo']
221 # Aliases mapping standard token types of Todo.txt format concepts
222 CompleteTaskText = Operator # Chosen to de-emphasize complete tasks
223 IncompleteTaskText = Text # Incomplete tasks should look like plain text
225 # Priority should have most emphasis to indicate importance of tasks
226 Priority = Generic.Heading
227 # Dates should have next most emphasis because time is important
228 Date = Generic.Subheading
230 # Project and context should have equal weight, and be in different colors
231 Project = Generic.Error
232 Context = String
234 # If tag functionality is added, it should have the same weight as Project
235 # and Context, and a different color. Generic.Traceback would work well.
237 # Regex patterns for building up rules; dates, priorities, projects, and
238 # contexts are all atomic
239 # TODO: Make date regex more ISO 8601 compliant
240 date_regex = r'\d{4,}-\d{2}-\d{2}'
241 priority_regex = r'\([A-Z]\)'
242 project_regex = r'\+\S+'
243 context_regex = r'@\S+'
245 # Compound regex expressions
246 complete_one_date_regex = r'(x )(' + date_regex + r')'
247 complete_two_date_regex = (complete_one_date_regex + r'( )(' +
248 date_regex + r')')
249 priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
251 tokens = {
252 # Should parse starting at beginning of line; each line is a task
253 'root': [
254 # Complete task entry points: two total:
255 # 1. Complete task with two dates
256 (complete_two_date_regex, bygroups(CompleteTaskText, Date,
257 CompleteTaskText, Date),
258 'complete'),
259 # 2. Complete task with one date
260 (complete_one_date_regex, bygroups(CompleteTaskText, Date),
261 'complete'),
263 # Incomplete task entry points: six total:
264 # 1. Priority plus date
265 (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
266 'incomplete'),
267 # 2. Priority only
268 (priority_regex, Priority, 'incomplete'),
269 # 3. Leading date
270 (date_regex, Date, 'incomplete'),
271 # 4. Leading context
272 (context_regex, Context, 'incomplete'),
273 # 5. Leading project
274 (project_regex, Project, 'incomplete'),
275 # 6. Non-whitespace catch-all
276 (r'\S+', IncompleteTaskText, 'incomplete'),
277 ],
279 # Parse a complete task
280 'complete': [
281 # Newline indicates end of task, should return to root
282 (r'\s*\n', CompleteTaskText, '#pop'),
283 # Tokenize contexts and projects
284 (context_regex, Context),
285 (project_regex, Project),
286 # Tokenize non-whitespace text
287 (r'\S+', CompleteTaskText),
288 # Tokenize whitespace not containing a newline
289 (r'\s+', CompleteTaskText),
290 ],
292 # Parse an incomplete task
293 'incomplete': [
294 # Newline indicates end of task, should return to root
295 (r'\s*\n', IncompleteTaskText, '#pop'),
296 # Tokenize contexts and projects
297 (context_regex, Context),
298 (project_regex, Project),
299 # Tokenize non-whitespace text
300 (r'\S+', IncompleteTaskText),
301 # Tokenize whitespace not containing a newline
302 (r'\s+', IncompleteTaskText),
303 ],
304 }
307class NotmuchLexer(RegexLexer):
308 """
309 For Notmuch email text format.
311 .. versionadded:: 2.5
313 Additional options accepted:
315 `body_lexer`
316 If given, highlight the contents of the message body with the specified
317 lexer, else guess it according to the body content (default: ``None``).
318 """
320 name = 'Notmuch'
321 url = 'https://notmuchmail.org/'
322 aliases = ['notmuch']
324 def _highlight_code(self, match):
325 code = match.group(1)
327 try:
328 if self.body_lexer:
329 lexer = get_lexer_by_name(self.body_lexer)
330 else:
331 lexer = guess_lexer(code.strip())
332 except ClassNotFound:
333 lexer = get_lexer_by_name('text')
335 yield from lexer.get_tokens_unprocessed(code)
337 tokens = {
338 'root': [
339 (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
340 ],
341 'message-attr': [
342 (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
343 (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
344 bygroups(Name.Attribute, Number.Integer)),
345 (r'(\s*filename:\s*)(.+\n)',
346 bygroups(Name.Attribute, String)),
347 default('#pop'),
348 ],
349 'message': [
350 (r'\fmessage\}\n', Keyword, '#pop'),
351 (r'\fheader\{\n', Keyword, 'header'),
352 (r'\fbody\{\n', Keyword, 'body'),
353 ],
354 'header': [
355 (r'\fheader\}\n', Keyword, '#pop'),
356 (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
357 bygroups(Name.Attribute, String)),
358 (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
359 bygroups(Generic.Strong, Literal, Name.Tag)),
360 ],
361 'body': [
362 (r'\fpart\{\n', Keyword, 'part'),
363 (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
364 (r'\fbody\}\n', Keyword, '#pop'),
365 ],
366 'part-attr': [
367 (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
368 (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
369 bygroups(Punctuation, Name.Attribute, String)),
370 (r'(,\s*)(Content-type:\s*)(.+\n)',
371 bygroups(Punctuation, Name.Attribute, String)),
372 default('#pop'),
373 ],
374 'part': [
375 (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
376 (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
377 (r'^Non-text part: .*\n', Comment),
378 (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
379 ],
380 }
382 def analyse_text(text):
383 return 1.0 if text.startswith('\fmessage{') else 0.0
385 def __init__(self, **options):
386 self.body_lexer = options.get('body_lexer', None)
387 RegexLexer.__init__(self, **options)
390class KernelLogLexer(RegexLexer):
391 """
392 For Linux Kernel log ("dmesg") output.
394 .. versionadded:: 2.6
395 """
396 name = 'Kernel log'
397 aliases = ['kmsg', 'dmesg']
398 filenames = ['*.kmsg', '*.dmesg']
400 tokens = {
401 'root': [
402 (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
403 (r'^[^:]+:info : (?=\[)', Text, 'info'),
404 (r'^[^:]+:warn : (?=\[)', Text, 'warn'),
405 (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
406 (r'^[^:]+:err : (?=\[)', Text, 'error'),
407 (r'^[^:]+:crit : (?=\[)', Text, 'error'),
408 (r'^(?=\[)', Text, 'unknown'),
409 ],
410 'unknown': [
411 (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
412 (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
413 default('info'),
414 ],
415 'base': [
416 (r'\[[0-9. ]+\] ', Number),
417 (r'(?<=\] ).+?:', Keyword),
418 (r'\n', Text, '#pop'),
419 ],
420 'debug': [
421 include('base'),
422 (r'.+\n', Comment, '#pop')
423 ],
424 'info': [
425 include('base'),
426 (r'.+\n', Text, '#pop')
427 ],
428 'warn': [
429 include('base'),
430 (r'.+\n', Generic.Strong, '#pop')
431 ],
432 'error': [
433 include('base'),
434 (r'.+\n', Generic.Error, '#pop')
435 ]
436 }