Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parse.py: 74%

1from __future__ import absolute_import

3__version__ = '1.19.1'

5# yes, I now have two problems

6import re

7import sys

8from datetime import datetime, time, tzinfo, timedelta

9from decimal import Decimal

10from functools import partial

11import logging

13__all__ = 'parse search findall with_pattern'.split()

15log = logging.getLogger(__name__)

18def with_pattern(pattern, regex_group_count=None):

19 r"""Attach a regular expression pattern matcher to a custom type converter

20 function.

22 This annotates the type converter with the :attr:`pattern` attribute.

24 EXAMPLE:

25 >>> import parse

26 >>> @parse.with_pattern(r"\d+")

27 ... def parse_number(text):

28 ... return int(text)

30 is equivalent to:

32 >>> def parse_number(text):

33 ... return int(text)

34 >>> parse_number.pattern = r"\d+"

36 :param pattern: regular expression pattern (as text)

37 :param regex_group_count: Indicates how many regex-groups are in pattern.

38 :return: wrapped function

39 """

41 def decorator(func):

42 func.pattern = pattern

43 func.regex_group_count = regex_group_count

44 return func

46 return decorator

49class int_convert:

50 """Convert a string to an integer.

52 The string may start with a sign.

54 It may be of a base other than 2, 8, 10 or 16.

56 If base isn't specified, it will be detected automatically based

57 on a string format. When string starts with a base indicator, 0#nnnn,

58 it overrides the default base of 10.

60 It may also have other non-numeric characters that we can ignore.

61 """

63 CHARS = '0123456789abcdefghijklmnopqrstuvwxyz'

65 def __init__(self, base=None):

66 self.base = base

68 def __call__(self, string, match):

69 if string[0] == '-':

70 sign = -1

71 number_start = 1

72 elif string[0] == '+':

73 sign = 1

74 number_start = 1

75 else:

76 sign = 1

77 number_start = 0

79 base = self.base

80 # If base wasn't specified, detect it automatically

81 if base is None:

83 # Assume decimal number, unless different base is detected

84 base = 10

86 # For number formats starting with 0b, 0o, 0x, use corresponding base ...

87 if string[number_start] == '0' and len(string) - number_start > 2:

88 if string[number_start + 1] in 'bB':

89 base = 2

90 elif string[number_start + 1] in 'oO':

91 base = 8

92 elif string[number_start + 1] in 'xX':

93 base = 16

95 chars = int_convert.CHARS[:base]

96 string = re.sub('[^%s]' % chars, '', string.lower())

97 return sign * int(string, base)

100class convert_first:

101 """Convert the first element of a pair.

102 This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled

103 """

104

105 def __init__(self, converter):

106 self.converter = converter

107

108 def __call__(self, string, match):

109 return self.converter(string)

110

111

112def percentage(string, match):

113 return float(string[:-1]) / 100.0

114

115

116class FixedTzOffset(tzinfo):

117 """Fixed offset in minutes east from UTC."""

118

119 ZERO = timedelta(0)

120

121 def __init__(self, offset, name):

122 self._offset = timedelta(minutes=offset)

123 self._name = name

124

125 def __repr__(self):

126 return '<%s %s %s>' % (self.__class__.__name__, self._name, self._offset)

127

128 def utcoffset(self, dt):

129 return self._offset

130

131 def tzname(self, dt):

132 return self._name

133

134 def dst(self, dt):

135 return self.ZERO

136

137 def __eq__(self, other):

138 if not isinstance(other, FixedTzOffset):

139 return False

140 return self._name == other._name and self._offset == other._offset

141

142

143MONTHS_MAP = dict(

144 Jan=1,

145 January=1,

146 Feb=2,

147 February=2,

148 Mar=3,

149 March=3,

150 Apr=4,

151 April=4,

152 May=5,

153 Jun=6,

154 June=6,

155 Jul=7,

156 July=7,

157 Aug=8,

158 August=8,

159 Sep=9,

160 September=9,

161 Oct=10,

162 October=10,

163 Nov=11,

164 November=11,

165 Dec=12,

166 December=12,

167)

168DAYS_PAT = r'(Mon|Tue|Wed|Thu|Fri|Sat|Sun)'

169MONTHS_PAT = r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)'

170ALL_MONTHS_PAT = r'(%s)' % '|'.join(MONTHS_MAP)

171TIME_PAT = r'(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)'

172AM_PAT = r'(\s+[AP]M)'

173TZ_PAT = r'(\s+[-+]\d\d?:?\d\d)'

174

175

176def date_convert(

177 string,

178 match,

179 ymd=None,

180 mdy=None,

181 dmy=None,

182 d_m_y=None,

183 hms=None,

184 am=None,

185 tz=None,

186 mm=None,

187 dd=None,

188):

189 """Convert the incoming string containing some date / time info into a

190 datetime instance.

191 """

192 groups = match.groups()

193 time_only = False

194 if mm and dd:

195 y = datetime.today().year

196 m = groups[mm]

197 d = groups[dd]

198 elif ymd is not None:

199 y, m, d = re.split(r'[-/\s]', groups[ymd])

200 elif mdy is not None:

201 m, d, y = re.split(r'[-/\s]', groups[mdy])

202 elif dmy is not None:

203 d, m, y = re.split(r'[-/\s]', groups[dmy])

204 elif d_m_y is not None:

205 d, m, y = d_m_y

206 d = groups[d]

207 m = groups[m]

208 y = groups[y]

209 else:

210 time_only = True

211

212 H = M = S = u = 0

213 if hms is not None and groups[hms]:

214 t = groups[hms].split(':')

215 if len(t) == 2:

216 H, M = t

217 else:

218 H, M, S = t

219 if '.' in S:

220 S, u = S.split('.')

221 u = int(float('.' + u) * 1000000)

222 S = int(S)

223 H = int(H)

224 M = int(M)

225

226 if am is not None:

227 am = groups[am]

228 if am:

229 am = am.strip()

230 if am == 'AM' and H == 12:

231 # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock

232 H -= 12

233 elif am == 'PM' and H == 12:

234 # no correction needed: 12PM is midday, 12:00 by 24 hour clock

235 pass

236 elif am == 'PM':

237 H += 12

238

239 if tz is not None:

240 tz = groups[tz]

241 if tz == 'Z':

242 tz = FixedTzOffset(0, 'UTC')

243 elif tz:

244 tz = tz.strip()

245 if tz.isupper():

246 # TODO use the awesome python TZ module?

247 pass

248 else:

249 sign = tz[0]

250 if ':' in tz:

251 tzh, tzm = tz[1:].split(':')

252 elif len(tz) == 4: # 'snnn'

253 tzh, tzm = tz[1], tz[2:4]

254 else:

255 tzh, tzm = tz[1:3], tz[3:5]

256 offset = int(tzm) + int(tzh) * 60

257 if sign == '-':

258 offset = -offset

259 tz = FixedTzOffset(offset, tz)

260

261 if time_only:

262 d = time(H, M, S, u, tzinfo=tz)

263 else:

264 y = int(y)

265 if m.isdigit():

266 m = int(m)

267 else:

268 m = MONTHS_MAP[m]

269 d = int(d)

270 d = datetime(y, m, d, H, M, S, u, tzinfo=tz)

271

272 return d

273

274

275class TooManyFields(ValueError):

276 pass

277

278

279class RepeatedNameError(ValueError):

280 pass

281

282

283# note: {} are handled separately

284# note: I don't use r'' here because Sublime Text 2 syntax highlight has a fit

285REGEX_SAFETY = re.compile(r'([?\\\\.[\]()*+\^$!\|])')

286

287# allowed field types

288ALLOWED_TYPES = set(list('nbox%fFegwWdDsSl') + ['t' + c for c in 'ieahgcts'])

289

290

291def extract_format(format, extra_types):

292 """Pull apart the format [[fill]align][sign][0][width][.precision][type]"""

293 fill = align = None

294 if format[0] in '<>=^':

295 align = format[0]

296 format = format[1:]

297 elif len(format) > 1 and format[1] in '<>=^':

298 fill = format[0]

299 align = format[1]

300 format = format[2:]

301

302 if format.startswith(('+', '-', ' ')):

303 format = format[1:]

304

305 zero = False

306 if format and format[0] == '0':

307 zero = True

308 format = format[1:]

309

310 width = ''

311 while format:

312 if not format[0].isdigit():

313 break

314 width += format[0]

315 format = format[1:]

316

317 if format.startswith('.'):

318 # Precision isn't needed but we need to capture it so that

319 # the ValueError isn't raised.

320 format = format[1:] # drop the '.'

321 precision = ''

322 while format:

323 if not format[0].isdigit():

324 break

325 precision += format[0]

326 format = format[1:]

327

328 # the rest is the type, if present

329 type = format

330 if type and type not in ALLOWED_TYPES and type not in extra_types:

331 raise ValueError('format spec %r not recognised' % type)

332

333 return locals()

334

335

336PARSE_RE = re.compile(r"""({{|}}|{\w*(?:(?:\.\w+)|(?:\[[^\]]+\]))*(?::[^}]+)?})""")

337

338

339class Parser(object):

340 """Encapsulate a format string that may be used to parse other strings."""

341

342 def __init__(self, format, extra_types=None, case_sensitive=False):

343 # a mapping of a name as in {hello.world} to a regex-group compatible

344 # name, like hello__world Its used to prevent the transformation of

345 # name-to-group and group to name to fail subtly, such as in:

346 # hello_.world-> hello___world->hello._world

347 self._group_to_name_map = {}

348 # also store the original field name to group name mapping to allow

349 # multiple instances of a name in the format string

350 self._name_to_group_map = {}

351 # and to sanity check the repeated instances store away the first

352 # field type specification for the named field

353 self._name_types = {}

354

355 self._format = format

356 if extra_types is None:

357 extra_types = {}

358 self._extra_types = extra_types

359 if case_sensitive:

360 self._re_flags = re.DOTALL

361 else:

362 self._re_flags = re.IGNORECASE | re.DOTALL

363 self._fixed_fields = []

364 self._named_fields = []

365 self._group_index = 0

366 self._type_conversions = {}

367 self._expression = self._generate_expression()

368 self.__search_re = None

369 self.__match_re = None

370

371 log.debug('format %r -> %r', format, self._expression)

372

373 def __repr__(self):

374 if len(self._format) > 20:

375 return '<%s %r>' % (self.__class__.__name__, self._format[:17] + '...')

376 return '<%s %r>' % (self.__class__.__name__, self._format)

377

378 @property

379 def _search_re(self):

380 if self.__search_re is None:

381 try:

382 self.__search_re = re.compile(self._expression, self._re_flags)

383 except AssertionError:

384 # access error through sys to keep py3k and backward compat

385 e = str(sys.exc_info()[1])

386 if e.endswith('this version only supports 100 named groups'):

387 raise TooManyFields(

388 'sorry, you are attempting to parse ' 'too many complex fields'

389 )

390 return self.__search_re

391

392 @property

393 def _match_re(self):

394 if self.__match_re is None:

395 expression = r'\A%s\Z' % self._expression

396 try:

397 self.__match_re = re.compile(expression, self._re_flags)

398 except AssertionError:

399 # access error through sys to keep py3k and backward compat

400 e = str(sys.exc_info()[1])

401 if e.endswith('this version only supports 100 named groups'):

402 raise TooManyFields(

403 'sorry, you are attempting to parse ' 'too many complex fields'

404 )

405 except re.error:

406 raise NotImplementedError(

407 "Group names (e.g. (?P<name>) can "

408 "cause failure, as they are not escaped properly: '%s'" % expression

409 )

410 return self.__match_re

411

412 @property

413 def named_fields(self):

414 return self._named_fields.copy()

415

416 @property

417 def fixed_fields(self):

418 return self._fixed_fields.copy()

419

420 def parse(self, string, evaluate_result=True):

421 """Match my format to the string exactly.

422

423 Return a Result or Match instance or None if there's no match.

424 """

425 m = self._match_re.match(string)

426 if m is None:

427 return None

428

429 if evaluate_result:

430 return self.evaluate_result(m)

431 else:

432 return Match(self, m)

433

434 def search(self, string, pos=0, endpos=None, evaluate_result=True):

435 """Search the string for my format.

436

437 Optionally start the search at "pos" character index and limit the

438 search to a maximum index of endpos - equivalent to

439 search(string[:endpos]).

440

441 If the ``evaluate_result`` argument is set to ``False`` a

442 Match instance is returned instead of the actual Result instance.

443

444 Return either a Result instance or None if there's no match.

445 """

446 if endpos is None:

447 endpos = len(string)

448 m = self._search_re.search(string, pos, endpos)

449 if m is None:

450 return None

451

452 if evaluate_result:

453 return self.evaluate_result(m)

454 else:

455 return Match(self, m)

456

457 def findall(

458 self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True

459 ):

460 """Search "string" for all occurrences of "format".

461

462 Optionally start the search at "pos" character index and limit the

463 search to a maximum index of endpos - equivalent to

464 search(string[:endpos]).

465

466 Returns an iterator that holds Result or Match instances for each format match

467 found.

468 """

469 if endpos is None:

470 endpos = len(string)

471 return ResultIterator(

472 self, string, pos, endpos, evaluate_result=evaluate_result

473 )

474

475 def _expand_named_fields(self, named_fields):

476 result = {}

477 for field, value in named_fields.items():

478 # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...'

479 basename, subkeys = re.match(r'([^\[]+)(.*)', field).groups()

480

481 # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}}

482 d = result

483 k = basename

484

485 if subkeys:

486 for subkey in re.findall(r'\[[^\]]+\]', subkeys):

487 d = d.setdefault(k, {})

488 k = subkey[1:-1]

489

490 # assign the value to the last key

491 d[k] = value

492

493 return result

494

495 def evaluate_result(self, m):

496 '''Generate a Result instance for the given regex match object'''

497 # ok, figure the fixed fields we've pulled out and type convert them

498 fixed_fields = list(m.groups())

499 for n in self._fixed_fields:

500 if n in self._type_conversions:

501 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m)

502 fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields)

503

504 # grab the named fields, converting where requested

505 groupdict = m.groupdict()

506 named_fields = {}

507 name_map = {}

508 for k in self._named_fields:

509 korig = self._group_to_name_map[k]

510 name_map[korig] = k

511 if k in self._type_conversions:

512 value = self._type_conversions[k](groupdict[k], m)

513 else:

514 value = groupdict[k]

515

516 named_fields[korig] = value

517

518 # now figure the match spans

519 spans = dict((n, m.span(name_map[n])) for n in named_fields)

520 spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields))

521

522 # and that's our result

523 return Result(fixed_fields, self._expand_named_fields(named_fields), spans)

524

525 def _regex_replace(self, match):

526 return '\\' + match.group(1)

527

528 def _generate_expression(self):

529 # turn my _format attribute into the _expression attribute

530 e = []

531 for part in PARSE_RE.split(self._format):

532 if not part:

533 continue

534 elif part == '{{':

535 e.append(r'\{')

536 elif part == '}}':

537 e.append(r'\}')

538 elif part[0] == '{' and part[-1] == '}':

539 # this will be a braces-delimited field to handle

540 e.append(self._handle_field(part))

541 else:

542 # just some text to match

543 e.append(REGEX_SAFETY.sub(self._regex_replace, part))

544 return ''.join(e)

545

546 def _to_group_name(self, field):

547 # return a version of field which can be used as capture group, even

548 # though it might contain '.'

549 group = field.replace('.', '_').replace('[', '_').replace(']', '_')

550

551 # make sure we don't collide ("a.b" colliding with "a_b")

552 n = 1

553 while group in self._group_to_name_map:

554 n += 1

555 if '.' in field:

556 group = field.replace('.', '_' * n)

557 elif '_' in field:

558 group = field.replace('_', '_' * n)

559 else:

560 raise KeyError('duplicated group name %r' % (field,))

561

562 # save off the mapping

563 self._group_to_name_map[group] = field

564 self._name_to_group_map[field] = group

565 return group

566

567 def _handle_field(self, field):

568 # first: lose the braces

569 field = field[1:-1]

570

571 # now figure whether this is an anonymous or named field, and whether

572 # there's any format specification

573 format = ''

574

575 if ':' in field:

576 name, format = field.split(':')

577 else:

578 name = field

579

580 # This *should* be more flexible, but parsing complicated structures

581 # out of the string is hard (and not necessarily useful) ... and I'm

582 # being lazy. So for now `identifier` is "anything starting with a

583 # letter" and digit args don't get attribute or element stuff.

584 if name and name[0].isalpha():

585 if name in self._name_to_group_map:

586 if self._name_types[name] != format:

587 raise RepeatedNameError(

588 'field type %r for field "%s" '

589 'does not match previous seen type %r'

590 % (format, name, self._name_types[name])

591 )

592 group = self._name_to_group_map[name]

593 # match previously-seen value

594 return r'(?P=%s)' % group

595 else:

596 group = self._to_group_name(name)

597 self._name_types[name] = format

598 self._named_fields.append(group)

599 # this will become a group, which must not contain dots

600 wrap = r'(?P<%s>%%s)' % group

601 else:

602 self._fixed_fields.append(self._group_index)

603 wrap = r'(%s)'

604 group = self._group_index

605

606 # simplest case: no type specifier ({} or {name})

607 if not format:

608 self._group_index += 1

609 return wrap % r'.+?'

610

611 # decode the format specification

612 format = extract_format(format, self._extra_types)

613

614 # figure type conversions, if any

615 type = format['type']

616 is_numeric = type and type in 'n%fegdobx'

617 if type in self._extra_types:

618 type_converter = self._extra_types[type]

619 s = getattr(type_converter, 'pattern', r'.+?')

620 regex_group_count = getattr(type_converter, 'regex_group_count', 0)

621 if regex_group_count is None:

622 regex_group_count = 0

623 self._group_index += regex_group_count

624 self._type_conversions[group] = convert_first(type_converter)

625 elif type == 'n':

626 s = r'\d{1,3}([,.]\d{3})*'

627 self._group_index += 1

628 self._type_conversions[group] = int_convert(10)

629 elif type == 'b':

630 s = r'(0[bB])?[01]+'

631 self._type_conversions[group] = int_convert(2)

632 self._group_index += 1

633 elif type == 'o':

634 s = r'(0[oO])?[0-7]+'

635 self._type_conversions[group] = int_convert(8)

636 self._group_index += 1

637 elif type == 'x':

638 s = r'(0[xX])?[0-9a-fA-F]+'

639 self._type_conversions[group] = int_convert(16)

640 self._group_index += 1

641 elif type == '%':

642 s = r'\d+(\.\d+)?%'

643 self._group_index += 1

644 self._type_conversions[group] = percentage

645 elif type == 'f':

646 s = r'\d*\.\d+'

647 self._type_conversions[group] = convert_first(float)

648 elif type == 'F':

649 s = r'\d*\.\d+'

650 self._type_conversions[group] = convert_first(Decimal)

651 elif type == 'e':

652 s = r'\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF'

653 self._type_conversions[group] = convert_first(float)

654 elif type == 'g':

655 s = r'\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF'

656 self._group_index += 2

657 self._type_conversions[group] = convert_first(float)

658 elif type == 'd':

659 if format.get('width'):

660 width = r'{1,%s}' % int(format['width'])

661 else:

662 width = '+'

663 s = r'\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}'.format(

664 w=width

665 )

666 self._type_conversions[

667 group

668 ] = int_convert() # do not specify number base, determine it automatically

669 elif type == 'ti':

670 s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT

671 n = self._group_index

672 self._type_conversions[group] = partial(

673 date_convert, ymd=n + 1, hms=n + 4, tz=n + 7

674 )

675 self._group_index += 7

676 elif type == 'tg':

677 s = r'(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?' % (

678 ALL_MONTHS_PAT,

679 TIME_PAT,

680 AM_PAT,

681 TZ_PAT,

682 )

683 n = self._group_index

684 self._type_conversions[group] = partial(

685 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9

686 )

687 self._group_index += 9

688 elif type == 'ta':

689 s = r'((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?' % (

690 ALL_MONTHS_PAT,

691 TIME_PAT,

692 AM_PAT,

693 TZ_PAT,

694 )

695 n = self._group_index

696 self._type_conversions[group] = partial(

697 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9

698 )

699 self._group_index += 9

700 elif type == 'te':

701 # this will allow microseconds through if they're present, but meh

702 s = r'(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s' % (

703 DAYS_PAT,

704 MONTHS_PAT,

705 TIME_PAT,

706 TZ_PAT,

707 )

708 n = self._group_index

709 self._type_conversions[group] = partial(

710 date_convert, dmy=n + 3, hms=n + 5, tz=n + 8

711 )

712 self._group_index += 8

713 elif type == 'th':

714 # slight flexibility here from the stock Apache format

715 s = r'(\d{1,2}[-/]%s[-/]\d{4}):%s%s' % (MONTHS_PAT, TIME_PAT, TZ_PAT)

716 n = self._group_index

717 self._type_conversions[group] = partial(

718 date_convert, dmy=n + 1, hms=n + 3, tz=n + 6

719 )

720 self._group_index += 6

721 elif type == 'tc':

722 s = r'(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})' % (

723 DAYS_PAT,

724 MONTHS_PAT,

725 TIME_PAT,

726 )

727 n = self._group_index

728 self._type_conversions[group] = partial(

729 date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5

730 )

731 self._group_index += 8

732 elif type == 'tt':

733 s = r'%s?%s?%s?' % (TIME_PAT, AM_PAT, TZ_PAT)

734 n = self._group_index

735 self._type_conversions[group] = partial(

736 date_convert, hms=n + 1, am=n + 4, tz=n + 5

737 )

738 self._group_index += 5

739 elif type == 'ts':

740 s = r'%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?' % MONTHS_PAT

741 n = self._group_index

742 self._type_conversions[group] = partial(

743 date_convert, mm=n + 1, dd=n + 3, hms=n + 5

744 )

745 self._group_index += 5

746 elif type == 'l':

747 s = r'[A-Za-z]+'

748 elif type:

749 s = r'\%s+' % type

750 elif format.get('precision'):

751 if format.get('width'):

752 s = r'.{%s,%s}?' % (format['width'], format['precision'])

753 else:

754 s = r'.{1,%s}?' % format['precision']

755 elif format.get('width'):

756 s = r'.{%s,}?' % format['width']

757 else:

758 s = r'.+?'

759

760 align = format['align']

761 fill = format['fill']

762

763 # handle some numeric-specific things like fill and sign

764 if is_numeric:

765 # prefix with something (align "=" trumps zero)

766 if align == '=':

767 # special case - align "=" acts like the zero above but with

768 # configurable fill defaulting to "0"

769 if not fill:

770 fill = '0'

771 s = r'%s*' % fill + s

772

773 # allow numbers to be prefixed with a sign

774 s = r'[-+ ]?' + s

775

776 if not fill:

777 fill = ' '

778

779 # Place into a group now - this captures the value we want to keep.

780 # Everything else from now is just padding to be stripped off

781 if wrap:

782 s = wrap % s

783 self._group_index += 1

784

785 if format['width']:

786 # all we really care about is that if the format originally

787 # specified a width then there will probably be padding - without

788 # an explicit alignment that'll mean right alignment with spaces

789 # padding

790 if not align:

791 align = '>'

792

793 if fill in r'.\+?*[](){}^$':

794 fill = '\\' + fill

795

796 # align "=" has been handled

797 if align == '<':

798 s = '%s%s*' % (s, fill)

799 elif align == '>':

800 s = '%s*%s' % (fill, s)

801 elif align == '^':

802 s = '%s*%s%s*' % (fill, s, fill)

803

804 return s

805

806

807class Result(object):

808 """The result of a parse() or search().

809

810 Fixed results may be looked up using `result[index]`.

811 Slices of fixed results may also be looked up.

812

813 Named results may be looked up using `result['name']`.

814

815 Named results may be tested for existence using `'name' in result`.

816 """

817

818 def __init__(self, fixed, named, spans):

819 self.fixed = fixed

820 self.named = named

821 self.spans = spans

822

823 def __getitem__(self, item):

824 if isinstance(item, (int, slice)):

825 return self.fixed[item]

826 return self.named[item]

827

828 def __repr__(self):

829 return '<%s %r %r>' % (self.__class__.__name__, self.fixed, self.named)

830

831 def __contains__(self, name):

832 return name in self.named

833

834

835class Match(object):

836 """The result of a parse() or search() if no results are generated.

837

838 This class is only used to expose internal used regex match objects

839 to the user and use them for external Parser.evaluate_result calls.

840 """

841

842 def __init__(self, parser, match):

843 self.parser = parser

844 self.match = match

845

846 def evaluate_result(self):

847 '''Generate results for this Match'''

848 return self.parser.evaluate_result(self.match)

849

850

851class ResultIterator(object):

852 """The result of a findall() operation.

853

854 Each element is a Result instance.

855 """

856

857 def __init__(self, parser, string, pos, endpos, evaluate_result=True):

858 self.parser = parser

859 self.string = string

860 self.pos = pos

861 self.endpos = endpos

862 self.evaluate_result = evaluate_result

863

864 def __iter__(self):

865 return self

866

867 def __next__(self):

868 m = self.parser._search_re.search(self.string, self.pos, self.endpos)

869 if m is None:

870 raise StopIteration()

871 self.pos = m.end()

872

873 if self.evaluate_result:

874 return self.parser.evaluate_result(m)

875 else:

876 return Match(self.parser, m)

877

878 # pre-py3k compat

879 next = __next__

880

881

882def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False):

883 """Using "format" attempt to pull values from "string".

884

885 The format must match the string contents exactly. If the value

886 you're looking for is instead just a part of the string use

887 search().

888

889 If ``evaluate_result`` is True the return value will be an Result instance with two attributes:

890

891 .fixed - tuple of fixed-position values from the string

892 .named - dict of named values from the string

893

894 If ``evaluate_result`` is False the return value will be a Match instance with one method:

895

896 .evaluate_result() - This will return a Result instance like you would get

897 with ``evaluate_result`` set to True

898

899 The default behaviour is to match strings case insensitively. You may match with

900 case by specifying case_sensitive=True.

901

902 If the format is invalid a ValueError will be raised.

903

904 See the module documentation for the use of "extra_types".

905

906 In the case there is no match parse() will return None.

907 """

908 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

909 return p.parse(string, evaluate_result=evaluate_result)

910

911

912def search(

913 format,

914 string,

915 pos=0,

916 endpos=None,

917 extra_types=None,

918 evaluate_result=True,

919 case_sensitive=False,

920):

921 """Search "string" for the first occurrence of "format".

922

923 The format may occur anywhere within the string. If

924 instead you wish for the format to exactly match the string

925 use parse().

926

927 Optionally start the search at "pos" character index and limit the search

928 to a maximum index of endpos - equivalent to search(string[:endpos]).

929

930 If ``evaluate_result`` is True the return value will be an Result instance with two attributes:

931

932 .fixed - tuple of fixed-position values from the string

933 .named - dict of named values from the string

934

935 If ``evaluate_result`` is False the return value will be a Match instance with one method:

936

937 .evaluate_result() - This will return a Result instance like you would get

938 with ``evaluate_result`` set to True

939

940 The default behaviour is to match strings case insensitively. You may match with

941 case by specifying case_sensitive=True.

942

943 If the format is invalid a ValueError will be raised.

944

945 See the module documentation for the use of "extra_types".

946

947 In the case there is no match parse() will return None.

948 """

949 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

950 return p.search(string, pos, endpos, evaluate_result=evaluate_result)

951

952

953def findall(

954 format,

955 string,

956 pos=0,

957 endpos=None,

958 extra_types=None,

959 evaluate_result=True,

960 case_sensitive=False,

961):

962 """Search "string" for all occurrences of "format".

963

964 You will be returned an iterator that holds Result instances

965 for each format match found.

966

967 Optionally start the search at "pos" character index and limit the search

968 to a maximum index of endpos - equivalent to search(string[:endpos]).

969

970 If ``evaluate_result`` is True each returned Result instance has two attributes:

971

972 .fixed - tuple of fixed-position values from the string

973 .named - dict of named values from the string

974

975 If ``evaluate_result`` is False each returned value is a Match instance with one method:

976

977 .evaluate_result() - This will return a Result instance like you would get

978 with ``evaluate_result`` set to True

979

980 The default behaviour is to match strings case insensitively. You may match with

981 case by specifying case_sensitive=True.

982

983 If the format is invalid a ValueError will be raised.

984

985 See the module documentation for the use of "extra_types".

986 """

987 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

988 return p.findall(string, pos, endpos, evaluate_result=evaluate_result)

989

990

991def compile(format, extra_types=None, case_sensitive=False):

992 """Create a Parser instance to parse "format".

993

994 The resultant Parser has a method .parse(string) which

995 behaves in the same manner as parse(format, string).

996

997 The default behaviour is to match strings case insensitively. You may match with

998 case by specifying case_sensitive=True.

999

1000 Use this function if you intend to parse many strings

1001 with the same format.

1002

1003 See the module documentation for the use of "extra_types".

1004

1005 Returns a Parser instance.

1006 """

1007 return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

1008

1009

1011#

1012# Permission is hereby granted, free of charge, to any person obtaining a copy

1013# of this software and associated documentation files (the "Software"), to deal

1014# in the Software without restriction, including without limitation the rights

1015# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

1016# copies of the Software, and to permit persons to whom the Software is

1017# furnished to do so, subject to the following conditions:

1018#

1019# The above copyright notice and this permission notice shall be included in

1020# all copies or substantial portions of the Software.

1021#

1022# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

1023# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

1024# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

1025# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

1026# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

1027# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

1028# SOFTWARE.

1029

1030# vim: set filetype=python ts=4 sw=4 et si tw=75