Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parse.py: 74%
509 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:15 +0000
1from __future__ import absolute_import
3__version__ = '1.19.1'
5# yes, I now have two problems
6import re
7import sys
8from datetime import datetime, time, tzinfo, timedelta
9from decimal import Decimal
10from functools import partial
11import logging
13__all__ = 'parse search findall with_pattern'.split()
15log = logging.getLogger(__name__)
18def with_pattern(pattern, regex_group_count=None):
19 r"""Attach a regular expression pattern matcher to a custom type converter
20 function.
22 This annotates the type converter with the :attr:`pattern` attribute.
24 EXAMPLE:
25 >>> import parse
26 >>> @parse.with_pattern(r"\d+")
27 ... def parse_number(text):
28 ... return int(text)
30 is equivalent to:
32 >>> def parse_number(text):
33 ... return int(text)
34 >>> parse_number.pattern = r"\d+"
36 :param pattern: regular expression pattern (as text)
37 :param regex_group_count: Indicates how many regex-groups are in pattern.
38 :return: wrapped function
39 """
41 def decorator(func):
42 func.pattern = pattern
43 func.regex_group_count = regex_group_count
44 return func
46 return decorator
49class int_convert:
50 """Convert a string to an integer.
52 The string may start with a sign.
54 It may be of a base other than 2, 8, 10 or 16.
56 If base isn't specified, it will be detected automatically based
57 on a string format. When string starts with a base indicator, 0#nnnn,
58 it overrides the default base of 10.
60 It may also have other non-numeric characters that we can ignore.
61 """
63 CHARS = '0123456789abcdefghijklmnopqrstuvwxyz'
65 def __init__(self, base=None):
66 self.base = base
68 def __call__(self, string, match):
69 if string[0] == '-':
70 sign = -1
71 number_start = 1
72 elif string[0] == '+':
73 sign = 1
74 number_start = 1
75 else:
76 sign = 1
77 number_start = 0
79 base = self.base
80 # If base wasn't specified, detect it automatically
81 if base is None:
83 # Assume decimal number, unless different base is detected
84 base = 10
86 # For number formats starting with 0b, 0o, 0x, use corresponding base ...
87 if string[number_start] == '0' and len(string) - number_start > 2:
88 if string[number_start + 1] in 'bB':
89 base = 2
90 elif string[number_start + 1] in 'oO':
91 base = 8
92 elif string[number_start + 1] in 'xX':
93 base = 16
95 chars = int_convert.CHARS[:base]
96 string = re.sub('[^%s]' % chars, '', string.lower())
97 return sign * int(string, base)
100class convert_first:
101 """Convert the first element of a pair.
102 This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled
103 """
105 def __init__(self, converter):
106 self.converter = converter
108 def __call__(self, string, match):
109 return self.converter(string)
112def percentage(string, match):
113 return float(string[:-1]) / 100.0
116class FixedTzOffset(tzinfo):
117 """Fixed offset in minutes east from UTC."""
119 ZERO = timedelta(0)
121 def __init__(self, offset, name):
122 self._offset = timedelta(minutes=offset)
123 self._name = name
125 def __repr__(self):
126 return '<%s %s %s>' % (self.__class__.__name__, self._name, self._offset)
128 def utcoffset(self, dt):
129 return self._offset
131 def tzname(self, dt):
132 return self._name
134 def dst(self, dt):
135 return self.ZERO
137 def __eq__(self, other):
138 if not isinstance(other, FixedTzOffset):
139 return False
140 return self._name == other._name and self._offset == other._offset
143MONTHS_MAP = dict(
144 Jan=1,
145 January=1,
146 Feb=2,
147 February=2,
148 Mar=3,
149 March=3,
150 Apr=4,
151 April=4,
152 May=5,
153 Jun=6,
154 June=6,
155 Jul=7,
156 July=7,
157 Aug=8,
158 August=8,
159 Sep=9,
160 September=9,
161 Oct=10,
162 October=10,
163 Nov=11,
164 November=11,
165 Dec=12,
166 December=12,
167)
168DAYS_PAT = r'(Mon|Tue|Wed|Thu|Fri|Sat|Sun)'
169MONTHS_PAT = r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)'
170ALL_MONTHS_PAT = r'(%s)' % '|'.join(MONTHS_MAP)
171TIME_PAT = r'(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)'
172AM_PAT = r'(\s+[AP]M)'
173TZ_PAT = r'(\s+[-+]\d\d?:?\d\d)'
176def date_convert(
177 string,
178 match,
179 ymd=None,
180 mdy=None,
181 dmy=None,
182 d_m_y=None,
183 hms=None,
184 am=None,
185 tz=None,
186 mm=None,
187 dd=None,
188):
189 """Convert the incoming string containing some date / time info into a
190 datetime instance.
191 """
192 groups = match.groups()
193 time_only = False
194 if mm and dd:
195 y = datetime.today().year
196 m = groups[mm]
197 d = groups[dd]
198 elif ymd is not None:
199 y, m, d = re.split(r'[-/\s]', groups[ymd])
200 elif mdy is not None:
201 m, d, y = re.split(r'[-/\s]', groups[mdy])
202 elif dmy is not None:
203 d, m, y = re.split(r'[-/\s]', groups[dmy])
204 elif d_m_y is not None:
205 d, m, y = d_m_y
206 d = groups[d]
207 m = groups[m]
208 y = groups[y]
209 else:
210 time_only = True
212 H = M = S = u = 0
213 if hms is not None and groups[hms]:
214 t = groups[hms].split(':')
215 if len(t) == 2:
216 H, M = t
217 else:
218 H, M, S = t
219 if '.' in S:
220 S, u = S.split('.')
221 u = int(float('.' + u) * 1000000)
222 S = int(S)
223 H = int(H)
224 M = int(M)
226 if am is not None:
227 am = groups[am]
228 if am:
229 am = am.strip()
230 if am == 'AM' and H == 12:
231 # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock
232 H -= 12
233 elif am == 'PM' and H == 12:
234 # no correction needed: 12PM is midday, 12:00 by 24 hour clock
235 pass
236 elif am == 'PM':
237 H += 12
239 if tz is not None:
240 tz = groups[tz]
241 if tz == 'Z':
242 tz = FixedTzOffset(0, 'UTC')
243 elif tz:
244 tz = tz.strip()
245 if tz.isupper():
246 # TODO use the awesome python TZ module?
247 pass
248 else:
249 sign = tz[0]
250 if ':' in tz:
251 tzh, tzm = tz[1:].split(':')
252 elif len(tz) == 4: # 'snnn'
253 tzh, tzm = tz[1], tz[2:4]
254 else:
255 tzh, tzm = tz[1:3], tz[3:5]
256 offset = int(tzm) + int(tzh) * 60
257 if sign == '-':
258 offset = -offset
259 tz = FixedTzOffset(offset, tz)
261 if time_only:
262 d = time(H, M, S, u, tzinfo=tz)
263 else:
264 y = int(y)
265 if m.isdigit():
266 m = int(m)
267 else:
268 m = MONTHS_MAP[m]
269 d = int(d)
270 d = datetime(y, m, d, H, M, S, u, tzinfo=tz)
272 return d
275class TooManyFields(ValueError):
276 pass
279class RepeatedNameError(ValueError):
280 pass
283# note: {} are handled separately
284# note: I don't use r'' here because Sublime Text 2 syntax highlight has a fit
285REGEX_SAFETY = re.compile(r'([?\\\\.[\]()*+\^$!\|])')
287# allowed field types
288ALLOWED_TYPES = set(list('nbox%fFegwWdDsSl') + ['t' + c for c in 'ieahgcts'])
291def extract_format(format, extra_types):
292 """Pull apart the format [[fill]align][sign][0][width][.precision][type]"""
293 fill = align = None
294 if format[0] in '<>=^':
295 align = format[0]
296 format = format[1:]
297 elif len(format) > 1 and format[1] in '<>=^':
298 fill = format[0]
299 align = format[1]
300 format = format[2:]
302 if format.startswith(('+', '-', ' ')):
303 format = format[1:]
305 zero = False
306 if format and format[0] == '0':
307 zero = True
308 format = format[1:]
310 width = ''
311 while format:
312 if not format[0].isdigit():
313 break
314 width += format[0]
315 format = format[1:]
317 if format.startswith('.'):
318 # Precision isn't needed but we need to capture it so that
319 # the ValueError isn't raised.
320 format = format[1:] # drop the '.'
321 precision = ''
322 while format:
323 if not format[0].isdigit():
324 break
325 precision += format[0]
326 format = format[1:]
328 # the rest is the type, if present
329 type = format
330 if type and type not in ALLOWED_TYPES and type not in extra_types:
331 raise ValueError('format spec %r not recognised' % type)
333 return locals()
336PARSE_RE = re.compile(r"""({{|}}|{\w*(?:(?:\.\w+)|(?:\[[^\]]+\]))*(?::[^}]+)?})""")
339class Parser(object):
340 """Encapsulate a format string that may be used to parse other strings."""
342 def __init__(self, format, extra_types=None, case_sensitive=False):
343 # a mapping of a name as in {hello.world} to a regex-group compatible
344 # name, like hello__world Its used to prevent the transformation of
345 # name-to-group and group to name to fail subtly, such as in:
346 # hello_.world-> hello___world->hello._world
347 self._group_to_name_map = {}
348 # also store the original field name to group name mapping to allow
349 # multiple instances of a name in the format string
350 self._name_to_group_map = {}
351 # and to sanity check the repeated instances store away the first
352 # field type specification for the named field
353 self._name_types = {}
355 self._format = format
356 if extra_types is None:
357 extra_types = {}
358 self._extra_types = extra_types
359 if case_sensitive:
360 self._re_flags = re.DOTALL
361 else:
362 self._re_flags = re.IGNORECASE | re.DOTALL
363 self._fixed_fields = []
364 self._named_fields = []
365 self._group_index = 0
366 self._type_conversions = {}
367 self._expression = self._generate_expression()
368 self.__search_re = None
369 self.__match_re = None
371 log.debug('format %r -> %r', format, self._expression)
373 def __repr__(self):
374 if len(self._format) > 20:
375 return '<%s %r>' % (self.__class__.__name__, self._format[:17] + '...')
376 return '<%s %r>' % (self.__class__.__name__, self._format)
378 @property
379 def _search_re(self):
380 if self.__search_re is None:
381 try:
382 self.__search_re = re.compile(self._expression, self._re_flags)
383 except AssertionError:
384 # access error through sys to keep py3k and backward compat
385 e = str(sys.exc_info()[1])
386 if e.endswith('this version only supports 100 named groups'):
387 raise TooManyFields(
388 'sorry, you are attempting to parse ' 'too many complex fields'
389 )
390 return self.__search_re
392 @property
393 def _match_re(self):
394 if self.__match_re is None:
395 expression = r'\A%s\Z' % self._expression
396 try:
397 self.__match_re = re.compile(expression, self._re_flags)
398 except AssertionError:
399 # access error through sys to keep py3k and backward compat
400 e = str(sys.exc_info()[1])
401 if e.endswith('this version only supports 100 named groups'):
402 raise TooManyFields(
403 'sorry, you are attempting to parse ' 'too many complex fields'
404 )
405 except re.error:
406 raise NotImplementedError(
407 "Group names (e.g. (?P<name>) can "
408 "cause failure, as they are not escaped properly: '%s'" % expression
409 )
410 return self.__match_re
412 @property
413 def named_fields(self):
414 return self._named_fields.copy()
416 @property
417 def fixed_fields(self):
418 return self._fixed_fields.copy()
420 def parse(self, string, evaluate_result=True):
421 """Match my format to the string exactly.
423 Return a Result or Match instance or None if there's no match.
424 """
425 m = self._match_re.match(string)
426 if m is None:
427 return None
429 if evaluate_result:
430 return self.evaluate_result(m)
431 else:
432 return Match(self, m)
434 def search(self, string, pos=0, endpos=None, evaluate_result=True):
435 """Search the string for my format.
437 Optionally start the search at "pos" character index and limit the
438 search to a maximum index of endpos - equivalent to
439 search(string[:endpos]).
441 If the ``evaluate_result`` argument is set to ``False`` a
442 Match instance is returned instead of the actual Result instance.
444 Return either a Result instance or None if there's no match.
445 """
446 if endpos is None:
447 endpos = len(string)
448 m = self._search_re.search(string, pos, endpos)
449 if m is None:
450 return None
452 if evaluate_result:
453 return self.evaluate_result(m)
454 else:
455 return Match(self, m)
457 def findall(
458 self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True
459 ):
460 """Search "string" for all occurrences of "format".
462 Optionally start the search at "pos" character index and limit the
463 search to a maximum index of endpos - equivalent to
464 search(string[:endpos]).
466 Returns an iterator that holds Result or Match instances for each format match
467 found.
468 """
469 if endpos is None:
470 endpos = len(string)
471 return ResultIterator(
472 self, string, pos, endpos, evaluate_result=evaluate_result
473 )
475 def _expand_named_fields(self, named_fields):
476 result = {}
477 for field, value in named_fields.items():
478 # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...'
479 basename, subkeys = re.match(r'([^\[]+)(.*)', field).groups()
481 # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}}
482 d = result
483 k = basename
485 if subkeys:
486 for subkey in re.findall(r'\[[^\]]+\]', subkeys):
487 d = d.setdefault(k, {})
488 k = subkey[1:-1]
490 # assign the value to the last key
491 d[k] = value
493 return result
495 def evaluate_result(self, m):
496 '''Generate a Result instance for the given regex match object'''
497 # ok, figure the fixed fields we've pulled out and type convert them
498 fixed_fields = list(m.groups())
499 for n in self._fixed_fields:
500 if n in self._type_conversions:
501 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m)
502 fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields)
504 # grab the named fields, converting where requested
505 groupdict = m.groupdict()
506 named_fields = {}
507 name_map = {}
508 for k in self._named_fields:
509 korig = self._group_to_name_map[k]
510 name_map[korig] = k
511 if k in self._type_conversions:
512 value = self._type_conversions[k](groupdict[k], m)
513 else:
514 value = groupdict[k]
516 named_fields[korig] = value
518 # now figure the match spans
519 spans = dict((n, m.span(name_map[n])) for n in named_fields)
520 spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields))
522 # and that's our result
523 return Result(fixed_fields, self._expand_named_fields(named_fields), spans)
525 def _regex_replace(self, match):
526 return '\\' + match.group(1)
528 def _generate_expression(self):
529 # turn my _format attribute into the _expression attribute
530 e = []
531 for part in PARSE_RE.split(self._format):
532 if not part:
533 continue
534 elif part == '{{':
535 e.append(r'\{')
536 elif part == '}}':
537 e.append(r'\}')
538 elif part[0] == '{' and part[-1] == '}':
539 # this will be a braces-delimited field to handle
540 e.append(self._handle_field(part))
541 else:
542 # just some text to match
543 e.append(REGEX_SAFETY.sub(self._regex_replace, part))
544 return ''.join(e)
546 def _to_group_name(self, field):
547 # return a version of field which can be used as capture group, even
548 # though it might contain '.'
549 group = field.replace('.', '_').replace('[', '_').replace(']', '_')
551 # make sure we don't collide ("a.b" colliding with "a_b")
552 n = 1
553 while group in self._group_to_name_map:
554 n += 1
555 if '.' in field:
556 group = field.replace('.', '_' * n)
557 elif '_' in field:
558 group = field.replace('_', '_' * n)
559 else:
560 raise KeyError('duplicated group name %r' % (field,))
562 # save off the mapping
563 self._group_to_name_map[group] = field
564 self._name_to_group_map[field] = group
565 return group
567 def _handle_field(self, field):
568 # first: lose the braces
569 field = field[1:-1]
571 # now figure whether this is an anonymous or named field, and whether
572 # there's any format specification
573 format = ''
575 if ':' in field:
576 name, format = field.split(':')
577 else:
578 name = field
580 # This *should* be more flexible, but parsing complicated structures
581 # out of the string is hard (and not necessarily useful) ... and I'm
582 # being lazy. So for now `identifier` is "anything starting with a
583 # letter" and digit args don't get attribute or element stuff.
584 if name and name[0].isalpha():
585 if name in self._name_to_group_map:
586 if self._name_types[name] != format:
587 raise RepeatedNameError(
588 'field type %r for field "%s" '
589 'does not match previous seen type %r'
590 % (format, name, self._name_types[name])
591 )
592 group = self._name_to_group_map[name]
593 # match previously-seen value
594 return r'(?P=%s)' % group
595 else:
596 group = self._to_group_name(name)
597 self._name_types[name] = format
598 self._named_fields.append(group)
599 # this will become a group, which must not contain dots
600 wrap = r'(?P<%s>%%s)' % group
601 else:
602 self._fixed_fields.append(self._group_index)
603 wrap = r'(%s)'
604 group = self._group_index
606 # simplest case: no type specifier ({} or {name})
607 if not format:
608 self._group_index += 1
609 return wrap % r'.+?'
611 # decode the format specification
612 format = extract_format(format, self._extra_types)
614 # figure type conversions, if any
615 type = format['type']
616 is_numeric = type and type in 'n%fegdobx'
617 if type in self._extra_types:
618 type_converter = self._extra_types[type]
619 s = getattr(type_converter, 'pattern', r'.+?')
620 regex_group_count = getattr(type_converter, 'regex_group_count', 0)
621 if regex_group_count is None:
622 regex_group_count = 0
623 self._group_index += regex_group_count
624 self._type_conversions[group] = convert_first(type_converter)
625 elif type == 'n':
626 s = r'\d{1,3}([,.]\d{3})*'
627 self._group_index += 1
628 self._type_conversions[group] = int_convert(10)
629 elif type == 'b':
630 s = r'(0[bB])?[01]+'
631 self._type_conversions[group] = int_convert(2)
632 self._group_index += 1
633 elif type == 'o':
634 s = r'(0[oO])?[0-7]+'
635 self._type_conversions[group] = int_convert(8)
636 self._group_index += 1
637 elif type == 'x':
638 s = r'(0[xX])?[0-9a-fA-F]+'
639 self._type_conversions[group] = int_convert(16)
640 self._group_index += 1
641 elif type == '%':
642 s = r'\d+(\.\d+)?%'
643 self._group_index += 1
644 self._type_conversions[group] = percentage
645 elif type == 'f':
646 s = r'\d*\.\d+'
647 self._type_conversions[group] = convert_first(float)
648 elif type == 'F':
649 s = r'\d*\.\d+'
650 self._type_conversions[group] = convert_first(Decimal)
651 elif type == 'e':
652 s = r'\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF'
653 self._type_conversions[group] = convert_first(float)
654 elif type == 'g':
655 s = r'\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF'
656 self._group_index += 2
657 self._type_conversions[group] = convert_first(float)
658 elif type == 'd':
659 if format.get('width'):
660 width = r'{1,%s}' % int(format['width'])
661 else:
662 width = '+'
663 s = r'\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}'.format(
664 w=width
665 )
666 self._type_conversions[
667 group
668 ] = int_convert() # do not specify number base, determine it automatically
669 elif type == 'ti':
670 s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT
671 n = self._group_index
672 self._type_conversions[group] = partial(
673 date_convert, ymd=n + 1, hms=n + 4, tz=n + 7
674 )
675 self._group_index += 7
676 elif type == 'tg':
677 s = r'(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?' % (
678 ALL_MONTHS_PAT,
679 TIME_PAT,
680 AM_PAT,
681 TZ_PAT,
682 )
683 n = self._group_index
684 self._type_conversions[group] = partial(
685 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9
686 )
687 self._group_index += 9
688 elif type == 'ta':
689 s = r'((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?' % (
690 ALL_MONTHS_PAT,
691 TIME_PAT,
692 AM_PAT,
693 TZ_PAT,
694 )
695 n = self._group_index
696 self._type_conversions[group] = partial(
697 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9
698 )
699 self._group_index += 9
700 elif type == 'te':
701 # this will allow microseconds through if they're present, but meh
702 s = r'(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s' % (
703 DAYS_PAT,
704 MONTHS_PAT,
705 TIME_PAT,
706 TZ_PAT,
707 )
708 n = self._group_index
709 self._type_conversions[group] = partial(
710 date_convert, dmy=n + 3, hms=n + 5, tz=n + 8
711 )
712 self._group_index += 8
713 elif type == 'th':
714 # slight flexibility here from the stock Apache format
715 s = r'(\d{1,2}[-/]%s[-/]\d{4}):%s%s' % (MONTHS_PAT, TIME_PAT, TZ_PAT)
716 n = self._group_index
717 self._type_conversions[group] = partial(
718 date_convert, dmy=n + 1, hms=n + 3, tz=n + 6
719 )
720 self._group_index += 6
721 elif type == 'tc':
722 s = r'(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})' % (
723 DAYS_PAT,
724 MONTHS_PAT,
725 TIME_PAT,
726 )
727 n = self._group_index
728 self._type_conversions[group] = partial(
729 date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5
730 )
731 self._group_index += 8
732 elif type == 'tt':
733 s = r'%s?%s?%s?' % (TIME_PAT, AM_PAT, TZ_PAT)
734 n = self._group_index
735 self._type_conversions[group] = partial(
736 date_convert, hms=n + 1, am=n + 4, tz=n + 5
737 )
738 self._group_index += 5
739 elif type == 'ts':
740 s = r'%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?' % MONTHS_PAT
741 n = self._group_index
742 self._type_conversions[group] = partial(
743 date_convert, mm=n + 1, dd=n + 3, hms=n + 5
744 )
745 self._group_index += 5
746 elif type == 'l':
747 s = r'[A-Za-z]+'
748 elif type:
749 s = r'\%s+' % type
750 elif format.get('precision'):
751 if format.get('width'):
752 s = r'.{%s,%s}?' % (format['width'], format['precision'])
753 else:
754 s = r'.{1,%s}?' % format['precision']
755 elif format.get('width'):
756 s = r'.{%s,}?' % format['width']
757 else:
758 s = r'.+?'
760 align = format['align']
761 fill = format['fill']
763 # handle some numeric-specific things like fill and sign
764 if is_numeric:
765 # prefix with something (align "=" trumps zero)
766 if align == '=':
767 # special case - align "=" acts like the zero above but with
768 # configurable fill defaulting to "0"
769 if not fill:
770 fill = '0'
771 s = r'%s*' % fill + s
773 # allow numbers to be prefixed with a sign
774 s = r'[-+ ]?' + s
776 if not fill:
777 fill = ' '
779 # Place into a group now - this captures the value we want to keep.
780 # Everything else from now is just padding to be stripped off
781 if wrap:
782 s = wrap % s
783 self._group_index += 1
785 if format['width']:
786 # all we really care about is that if the format originally
787 # specified a width then there will probably be padding - without
788 # an explicit alignment that'll mean right alignment with spaces
789 # padding
790 if not align:
791 align = '>'
793 if fill in r'.\+?*[](){}^$':
794 fill = '\\' + fill
796 # align "=" has been handled
797 if align == '<':
798 s = '%s%s*' % (s, fill)
799 elif align == '>':
800 s = '%s*%s' % (fill, s)
801 elif align == '^':
802 s = '%s*%s%s*' % (fill, s, fill)
804 return s
807class Result(object):
808 """The result of a parse() or search().
810 Fixed results may be looked up using `result[index]`.
811 Slices of fixed results may also be looked up.
813 Named results may be looked up using `result['name']`.
815 Named results may be tested for existence using `'name' in result`.
816 """
818 def __init__(self, fixed, named, spans):
819 self.fixed = fixed
820 self.named = named
821 self.spans = spans
823 def __getitem__(self, item):
824 if isinstance(item, (int, slice)):
825 return self.fixed[item]
826 return self.named[item]
828 def __repr__(self):
829 return '<%s %r %r>' % (self.__class__.__name__, self.fixed, self.named)
831 def __contains__(self, name):
832 return name in self.named
835class Match(object):
836 """The result of a parse() or search() if no results are generated.
838 This class is only used to expose internal used regex match objects
839 to the user and use them for external Parser.evaluate_result calls.
840 """
842 def __init__(self, parser, match):
843 self.parser = parser
844 self.match = match
846 def evaluate_result(self):
847 '''Generate results for this Match'''
848 return self.parser.evaluate_result(self.match)
851class ResultIterator(object):
852 """The result of a findall() operation.
854 Each element is a Result instance.
855 """
857 def __init__(self, parser, string, pos, endpos, evaluate_result=True):
858 self.parser = parser
859 self.string = string
860 self.pos = pos
861 self.endpos = endpos
862 self.evaluate_result = evaluate_result
864 def __iter__(self):
865 return self
867 def __next__(self):
868 m = self.parser._search_re.search(self.string, self.pos, self.endpos)
869 if m is None:
870 raise StopIteration()
871 self.pos = m.end()
873 if self.evaluate_result:
874 return self.parser.evaluate_result(m)
875 else:
876 return Match(self.parser, m)
878 # pre-py3k compat
879 next = __next__
882def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False):
883 """Using "format" attempt to pull values from "string".
885 The format must match the string contents exactly. If the value
886 you're looking for is instead just a part of the string use
887 search().
889 If ``evaluate_result`` is True the return value will be an Result instance with two attributes:
891 .fixed - tuple of fixed-position values from the string
892 .named - dict of named values from the string
894 If ``evaluate_result`` is False the return value will be a Match instance with one method:
896 .evaluate_result() - This will return a Result instance like you would get
897 with ``evaluate_result`` set to True
899 The default behaviour is to match strings case insensitively. You may match with
900 case by specifying case_sensitive=True.
902 If the format is invalid a ValueError will be raised.
904 See the module documentation for the use of "extra_types".
906 In the case there is no match parse() will return None.
907 """
908 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
909 return p.parse(string, evaluate_result=evaluate_result)
912def search(
913 format,
914 string,
915 pos=0,
916 endpos=None,
917 extra_types=None,
918 evaluate_result=True,
919 case_sensitive=False,
920):
921 """Search "string" for the first occurrence of "format".
923 The format may occur anywhere within the string. If
924 instead you wish for the format to exactly match the string
925 use parse().
927 Optionally start the search at "pos" character index and limit the search
928 to a maximum index of endpos - equivalent to search(string[:endpos]).
930 If ``evaluate_result`` is True the return value will be an Result instance with two attributes:
932 .fixed - tuple of fixed-position values from the string
933 .named - dict of named values from the string
935 If ``evaluate_result`` is False the return value will be a Match instance with one method:
937 .evaluate_result() - This will return a Result instance like you would get
938 with ``evaluate_result`` set to True
940 The default behaviour is to match strings case insensitively. You may match with
941 case by specifying case_sensitive=True.
943 If the format is invalid a ValueError will be raised.
945 See the module documentation for the use of "extra_types".
947 In the case there is no match parse() will return None.
948 """
949 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
950 return p.search(string, pos, endpos, evaluate_result=evaluate_result)
953def findall(
954 format,
955 string,
956 pos=0,
957 endpos=None,
958 extra_types=None,
959 evaluate_result=True,
960 case_sensitive=False,
961):
962 """Search "string" for all occurrences of "format".
964 You will be returned an iterator that holds Result instances
965 for each format match found.
967 Optionally start the search at "pos" character index and limit the search
968 to a maximum index of endpos - equivalent to search(string[:endpos]).
970 If ``evaluate_result`` is True each returned Result instance has two attributes:
972 .fixed - tuple of fixed-position values from the string
973 .named - dict of named values from the string
975 If ``evaluate_result`` is False each returned value is a Match instance with one method:
977 .evaluate_result() - This will return a Result instance like you would get
978 with ``evaluate_result`` set to True
980 The default behaviour is to match strings case insensitively. You may match with
981 case by specifying case_sensitive=True.
983 If the format is invalid a ValueError will be raised.
985 See the module documentation for the use of "extra_types".
986 """
987 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
988 return p.findall(string, pos, endpos, evaluate_result=evaluate_result)
991def compile(format, extra_types=None, case_sensitive=False):
992 """Create a Parser instance to parse "format".
994 The resultant Parser has a method .parse(string) which
995 behaves in the same manner as parse(format, string).
997 The default behaviour is to match strings case insensitively. You may match with
998 case by specifying case_sensitive=True.
1000 Use this function if you intend to parse many strings
1001 with the same format.
1003 See the module documentation for the use of "extra_types".
1005 Returns a Parser instance.
1006 """
1007 return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)
1010# Copyright (c) 2012-2020 Richard Jones <richard@python.org>
1011#
1012# Permission is hereby granted, free of charge, to any person obtaining a copy
1013# of this software and associated documentation files (the "Software"), to deal
1014# in the Software without restriction, including without limitation the rights
1015# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1016# copies of the Software, and to permit persons to whom the Software is
1017# furnished to do so, subject to the following conditions:
1018#
1019# The above copyright notice and this permission notice shall be included in
1020# all copies or substantial portions of the Software.
1021#
1022# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1023# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1024# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1025# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1026# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1027# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1028# SOFTWARE.
1030# vim: set filetype=python ts=4 sw=4 et si tw=75