Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/parse.py: 74%

509 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:15 +0000

1from __future__ import absolute_import 

2 

3__version__ = '1.19.1' 

4 

5# yes, I now have two problems 

6import re 

7import sys 

8from datetime import datetime, time, tzinfo, timedelta 

9from decimal import Decimal 

10from functools import partial 

11import logging 

12 

13__all__ = 'parse search findall with_pattern'.split() 

14 

15log = logging.getLogger(__name__) 

16 

17 

18def with_pattern(pattern, regex_group_count=None): 

19 r"""Attach a regular expression pattern matcher to a custom type converter 

20 function. 

21 

22 This annotates the type converter with the :attr:`pattern` attribute. 

23 

24 EXAMPLE: 

25 >>> import parse 

26 >>> @parse.with_pattern(r"\d+") 

27 ... def parse_number(text): 

28 ... return int(text) 

29 

30 is equivalent to: 

31 

32 >>> def parse_number(text): 

33 ... return int(text) 

34 >>> parse_number.pattern = r"\d+" 

35 

36 :param pattern: regular expression pattern (as text) 

37 :param regex_group_count: Indicates how many regex-groups are in pattern. 

38 :return: wrapped function 

39 """ 

40 

41 def decorator(func): 

42 func.pattern = pattern 

43 func.regex_group_count = regex_group_count 

44 return func 

45 

46 return decorator 

47 

48 

49class int_convert: 

50 """Convert a string to an integer. 

51 

52 The string may start with a sign. 

53 

54 It may be of a base other than 2, 8, 10 or 16. 

55 

56 If base isn't specified, it will be detected automatically based 

57 on a string format. When string starts with a base indicator, 0#nnnn, 

58 it overrides the default base of 10. 

59 

60 It may also have other non-numeric characters that we can ignore. 

61 """ 

62 

63 CHARS = '0123456789abcdefghijklmnopqrstuvwxyz' 

64 

65 def __init__(self, base=None): 

66 self.base = base 

67 

68 def __call__(self, string, match): 

69 if string[0] == '-': 

70 sign = -1 

71 number_start = 1 

72 elif string[0] == '+': 

73 sign = 1 

74 number_start = 1 

75 else: 

76 sign = 1 

77 number_start = 0 

78 

79 base = self.base 

80 # If base wasn't specified, detect it automatically 

81 if base is None: 

82 

83 # Assume decimal number, unless different base is detected 

84 base = 10 

85 

86 # For number formats starting with 0b, 0o, 0x, use corresponding base ... 

87 if string[number_start] == '0' and len(string) - number_start > 2: 

88 if string[number_start + 1] in 'bB': 

89 base = 2 

90 elif string[number_start + 1] in 'oO': 

91 base = 8 

92 elif string[number_start + 1] in 'xX': 

93 base = 16 

94 

95 chars = int_convert.CHARS[:base] 

96 string = re.sub('[^%s]' % chars, '', string.lower()) 

97 return sign * int(string, base) 

98 

99 

100class convert_first: 

101 """Convert the first element of a pair. 

102 This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled 

103 """ 

104 

105 def __init__(self, converter): 

106 self.converter = converter 

107 

108 def __call__(self, string, match): 

109 return self.converter(string) 

110 

111 

112def percentage(string, match): 

113 return float(string[:-1]) / 100.0 

114 

115 

116class FixedTzOffset(tzinfo): 

117 """Fixed offset in minutes east from UTC.""" 

118 

119 ZERO = timedelta(0) 

120 

121 def __init__(self, offset, name): 

122 self._offset = timedelta(minutes=offset) 

123 self._name = name 

124 

125 def __repr__(self): 

126 return '<%s %s %s>' % (self.__class__.__name__, self._name, self._offset) 

127 

128 def utcoffset(self, dt): 

129 return self._offset 

130 

131 def tzname(self, dt): 

132 return self._name 

133 

134 def dst(self, dt): 

135 return self.ZERO 

136 

137 def __eq__(self, other): 

138 if not isinstance(other, FixedTzOffset): 

139 return False 

140 return self._name == other._name and self._offset == other._offset 

141 

142 

143MONTHS_MAP = dict( 

144 Jan=1, 

145 January=1, 

146 Feb=2, 

147 February=2, 

148 Mar=3, 

149 March=3, 

150 Apr=4, 

151 April=4, 

152 May=5, 

153 Jun=6, 

154 June=6, 

155 Jul=7, 

156 July=7, 

157 Aug=8, 

158 August=8, 

159 Sep=9, 

160 September=9, 

161 Oct=10, 

162 October=10, 

163 Nov=11, 

164 November=11, 

165 Dec=12, 

166 December=12, 

167) 

168DAYS_PAT = r'(Mon|Tue|Wed|Thu|Fri|Sat|Sun)' 

169MONTHS_PAT = r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)' 

170ALL_MONTHS_PAT = r'(%s)' % '|'.join(MONTHS_MAP) 

171TIME_PAT = r'(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)' 

172AM_PAT = r'(\s+[AP]M)' 

173TZ_PAT = r'(\s+[-+]\d\d?:?\d\d)' 

174 

175 

176def date_convert( 

177 string, 

178 match, 

179 ymd=None, 

180 mdy=None, 

181 dmy=None, 

182 d_m_y=None, 

183 hms=None, 

184 am=None, 

185 tz=None, 

186 mm=None, 

187 dd=None, 

188): 

189 """Convert the incoming string containing some date / time info into a 

190 datetime instance. 

191 """ 

192 groups = match.groups() 

193 time_only = False 

194 if mm and dd: 

195 y = datetime.today().year 

196 m = groups[mm] 

197 d = groups[dd] 

198 elif ymd is not None: 

199 y, m, d = re.split(r'[-/\s]', groups[ymd]) 

200 elif mdy is not None: 

201 m, d, y = re.split(r'[-/\s]', groups[mdy]) 

202 elif dmy is not None: 

203 d, m, y = re.split(r'[-/\s]', groups[dmy]) 

204 elif d_m_y is not None: 

205 d, m, y = d_m_y 

206 d = groups[d] 

207 m = groups[m] 

208 y = groups[y] 

209 else: 

210 time_only = True 

211 

212 H = M = S = u = 0 

213 if hms is not None and groups[hms]: 

214 t = groups[hms].split(':') 

215 if len(t) == 2: 

216 H, M = t 

217 else: 

218 H, M, S = t 

219 if '.' in S: 

220 S, u = S.split('.') 

221 u = int(float('.' + u) * 1000000) 

222 S = int(S) 

223 H = int(H) 

224 M = int(M) 

225 

226 if am is not None: 

227 am = groups[am] 

228 if am: 

229 am = am.strip() 

230 if am == 'AM' and H == 12: 

231 # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock 

232 H -= 12 

233 elif am == 'PM' and H == 12: 

234 # no correction needed: 12PM is midday, 12:00 by 24 hour clock 

235 pass 

236 elif am == 'PM': 

237 H += 12 

238 

239 if tz is not None: 

240 tz = groups[tz] 

241 if tz == 'Z': 

242 tz = FixedTzOffset(0, 'UTC') 

243 elif tz: 

244 tz = tz.strip() 

245 if tz.isupper(): 

246 # TODO use the awesome python TZ module? 

247 pass 

248 else: 

249 sign = tz[0] 

250 if ':' in tz: 

251 tzh, tzm = tz[1:].split(':') 

252 elif len(tz) == 4: # 'snnn' 

253 tzh, tzm = tz[1], tz[2:4] 

254 else: 

255 tzh, tzm = tz[1:3], tz[3:5] 

256 offset = int(tzm) + int(tzh) * 60 

257 if sign == '-': 

258 offset = -offset 

259 tz = FixedTzOffset(offset, tz) 

260 

261 if time_only: 

262 d = time(H, M, S, u, tzinfo=tz) 

263 else: 

264 y = int(y) 

265 if m.isdigit(): 

266 m = int(m) 

267 else: 

268 m = MONTHS_MAP[m] 

269 d = int(d) 

270 d = datetime(y, m, d, H, M, S, u, tzinfo=tz) 

271 

272 return d 

273 

274 

275class TooManyFields(ValueError): 

276 pass 

277 

278 

279class RepeatedNameError(ValueError): 

280 pass 

281 

282 

283# note: {} are handled separately 

284# note: I don't use r'' here because Sublime Text 2 syntax highlight has a fit 

285REGEX_SAFETY = re.compile(r'([?\\\\.[\]()*+\^$!\|])') 

286 

287# allowed field types 

288ALLOWED_TYPES = set(list('nbox%fFegwWdDsSl') + ['t' + c for c in 'ieahgcts']) 

289 

290 

291def extract_format(format, extra_types): 

292 """Pull apart the format [[fill]align][sign][0][width][.precision][type]""" 

293 fill = align = None 

294 if format[0] in '<>=^': 

295 align = format[0] 

296 format = format[1:] 

297 elif len(format) > 1 and format[1] in '<>=^': 

298 fill = format[0] 

299 align = format[1] 

300 format = format[2:] 

301 

302 if format.startswith(('+', '-', ' ')): 

303 format = format[1:] 

304 

305 zero = False 

306 if format and format[0] == '0': 

307 zero = True 

308 format = format[1:] 

309 

310 width = '' 

311 while format: 

312 if not format[0].isdigit(): 

313 break 

314 width += format[0] 

315 format = format[1:] 

316 

317 if format.startswith('.'): 

318 # Precision isn't needed but we need to capture it so that 

319 # the ValueError isn't raised. 

320 format = format[1:] # drop the '.' 

321 precision = '' 

322 while format: 

323 if not format[0].isdigit(): 

324 break 

325 precision += format[0] 

326 format = format[1:] 

327 

328 # the rest is the type, if present 

329 type = format 

330 if type and type not in ALLOWED_TYPES and type not in extra_types: 

331 raise ValueError('format spec %r not recognised' % type) 

332 

333 return locals() 

334 

335 

336PARSE_RE = re.compile(r"""({{|}}|{\w*(?:(?:\.\w+)|(?:\[[^\]]+\]))*(?::[^}]+)?})""") 

337 

338 

339class Parser(object): 

340 """Encapsulate a format string that may be used to parse other strings.""" 

341 

342 def __init__(self, format, extra_types=None, case_sensitive=False): 

343 # a mapping of a name as in {hello.world} to a regex-group compatible 

344 # name, like hello__world Its used to prevent the transformation of 

345 # name-to-group and group to name to fail subtly, such as in: 

346 # hello_.world-> hello___world->hello._world 

347 self._group_to_name_map = {} 

348 # also store the original field name to group name mapping to allow 

349 # multiple instances of a name in the format string 

350 self._name_to_group_map = {} 

351 # and to sanity check the repeated instances store away the first 

352 # field type specification for the named field 

353 self._name_types = {} 

354 

355 self._format = format 

356 if extra_types is None: 

357 extra_types = {} 

358 self._extra_types = extra_types 

359 if case_sensitive: 

360 self._re_flags = re.DOTALL 

361 else: 

362 self._re_flags = re.IGNORECASE | re.DOTALL 

363 self._fixed_fields = [] 

364 self._named_fields = [] 

365 self._group_index = 0 

366 self._type_conversions = {} 

367 self._expression = self._generate_expression() 

368 self.__search_re = None 

369 self.__match_re = None 

370 

371 log.debug('format %r -> %r', format, self._expression) 

372 

373 def __repr__(self): 

374 if len(self._format) > 20: 

375 return '<%s %r>' % (self.__class__.__name__, self._format[:17] + '...') 

376 return '<%s %r>' % (self.__class__.__name__, self._format) 

377 

378 @property 

379 def _search_re(self): 

380 if self.__search_re is None: 

381 try: 

382 self.__search_re = re.compile(self._expression, self._re_flags) 

383 except AssertionError: 

384 # access error through sys to keep py3k and backward compat 

385 e = str(sys.exc_info()[1]) 

386 if e.endswith('this version only supports 100 named groups'): 

387 raise TooManyFields( 

388 'sorry, you are attempting to parse ' 'too many complex fields' 

389 ) 

390 return self.__search_re 

391 

392 @property 

393 def _match_re(self): 

394 if self.__match_re is None: 

395 expression = r'\A%s\Z' % self._expression 

396 try: 

397 self.__match_re = re.compile(expression, self._re_flags) 

398 except AssertionError: 

399 # access error through sys to keep py3k and backward compat 

400 e = str(sys.exc_info()[1]) 

401 if e.endswith('this version only supports 100 named groups'): 

402 raise TooManyFields( 

403 'sorry, you are attempting to parse ' 'too many complex fields' 

404 ) 

405 except re.error: 

406 raise NotImplementedError( 

407 "Group names (e.g. (?P<name>) can " 

408 "cause failure, as they are not escaped properly: '%s'" % expression 

409 ) 

410 return self.__match_re 

411 

412 @property 

413 def named_fields(self): 

414 return self._named_fields.copy() 

415 

416 @property 

417 def fixed_fields(self): 

418 return self._fixed_fields.copy() 

419 

420 def parse(self, string, evaluate_result=True): 

421 """Match my format to the string exactly. 

422 

423 Return a Result or Match instance or None if there's no match. 

424 """ 

425 m = self._match_re.match(string) 

426 if m is None: 

427 return None 

428 

429 if evaluate_result: 

430 return self.evaluate_result(m) 

431 else: 

432 return Match(self, m) 

433 

434 def search(self, string, pos=0, endpos=None, evaluate_result=True): 

435 """Search the string for my format. 

436 

437 Optionally start the search at "pos" character index and limit the 

438 search to a maximum index of endpos - equivalent to 

439 search(string[:endpos]). 

440 

441 If the ``evaluate_result`` argument is set to ``False`` a 

442 Match instance is returned instead of the actual Result instance. 

443 

444 Return either a Result instance or None if there's no match. 

445 """ 

446 if endpos is None: 

447 endpos = len(string) 

448 m = self._search_re.search(string, pos, endpos) 

449 if m is None: 

450 return None 

451 

452 if evaluate_result: 

453 return self.evaluate_result(m) 

454 else: 

455 return Match(self, m) 

456 

457 def findall( 

458 self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True 

459 ): 

460 """Search "string" for all occurrences of "format". 

461 

462 Optionally start the search at "pos" character index and limit the 

463 search to a maximum index of endpos - equivalent to 

464 search(string[:endpos]). 

465 

466 Returns an iterator that holds Result or Match instances for each format match 

467 found. 

468 """ 

469 if endpos is None: 

470 endpos = len(string) 

471 return ResultIterator( 

472 self, string, pos, endpos, evaluate_result=evaluate_result 

473 ) 

474 

475 def _expand_named_fields(self, named_fields): 

476 result = {} 

477 for field, value in named_fields.items(): 

478 # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...' 

479 basename, subkeys = re.match(r'([^\[]+)(.*)', field).groups() 

480 

481 # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}} 

482 d = result 

483 k = basename 

484 

485 if subkeys: 

486 for subkey in re.findall(r'\[[^\]]+\]', subkeys): 

487 d = d.setdefault(k, {}) 

488 k = subkey[1:-1] 

489 

490 # assign the value to the last key 

491 d[k] = value 

492 

493 return result 

494 

495 def evaluate_result(self, m): 

496 '''Generate a Result instance for the given regex match object''' 

497 # ok, figure the fixed fields we've pulled out and type convert them 

498 fixed_fields = list(m.groups()) 

499 for n in self._fixed_fields: 

500 if n in self._type_conversions: 

501 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m) 

502 fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields) 

503 

504 # grab the named fields, converting where requested 

505 groupdict = m.groupdict() 

506 named_fields = {} 

507 name_map = {} 

508 for k in self._named_fields: 

509 korig = self._group_to_name_map[k] 

510 name_map[korig] = k 

511 if k in self._type_conversions: 

512 value = self._type_conversions[k](groupdict[k], m) 

513 else: 

514 value = groupdict[k] 

515 

516 named_fields[korig] = value 

517 

518 # now figure the match spans 

519 spans = dict((n, m.span(name_map[n])) for n in named_fields) 

520 spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields)) 

521 

522 # and that's our result 

523 return Result(fixed_fields, self._expand_named_fields(named_fields), spans) 

524 

525 def _regex_replace(self, match): 

526 return '\\' + match.group(1) 

527 

528 def _generate_expression(self): 

529 # turn my _format attribute into the _expression attribute 

530 e = [] 

531 for part in PARSE_RE.split(self._format): 

532 if not part: 

533 continue 

534 elif part == '{{': 

535 e.append(r'\{') 

536 elif part == '}}': 

537 e.append(r'\}') 

538 elif part[0] == '{' and part[-1] == '}': 

539 # this will be a braces-delimited field to handle 

540 e.append(self._handle_field(part)) 

541 else: 

542 # just some text to match 

543 e.append(REGEX_SAFETY.sub(self._regex_replace, part)) 

544 return ''.join(e) 

545 

546 def _to_group_name(self, field): 

547 # return a version of field which can be used as capture group, even 

548 # though it might contain '.' 

549 group = field.replace('.', '_').replace('[', '_').replace(']', '_') 

550 

551 # make sure we don't collide ("a.b" colliding with "a_b") 

552 n = 1 

553 while group in self._group_to_name_map: 

554 n += 1 

555 if '.' in field: 

556 group = field.replace('.', '_' * n) 

557 elif '_' in field: 

558 group = field.replace('_', '_' * n) 

559 else: 

560 raise KeyError('duplicated group name %r' % (field,)) 

561 

562 # save off the mapping 

563 self._group_to_name_map[group] = field 

564 self._name_to_group_map[field] = group 

565 return group 

566 

567 def _handle_field(self, field): 

568 # first: lose the braces 

569 field = field[1:-1] 

570 

571 # now figure whether this is an anonymous or named field, and whether 

572 # there's any format specification 

573 format = '' 

574 

575 if ':' in field: 

576 name, format = field.split(':') 

577 else: 

578 name = field 

579 

580 # This *should* be more flexible, but parsing complicated structures 

581 # out of the string is hard (and not necessarily useful) ... and I'm 

582 # being lazy. So for now `identifier` is "anything starting with a 

583 # letter" and digit args don't get attribute or element stuff. 

584 if name and name[0].isalpha(): 

585 if name in self._name_to_group_map: 

586 if self._name_types[name] != format: 

587 raise RepeatedNameError( 

588 'field type %r for field "%s" ' 

589 'does not match previous seen type %r' 

590 % (format, name, self._name_types[name]) 

591 ) 

592 group = self._name_to_group_map[name] 

593 # match previously-seen value 

594 return r'(?P=%s)' % group 

595 else: 

596 group = self._to_group_name(name) 

597 self._name_types[name] = format 

598 self._named_fields.append(group) 

599 # this will become a group, which must not contain dots 

600 wrap = r'(?P<%s>%%s)' % group 

601 else: 

602 self._fixed_fields.append(self._group_index) 

603 wrap = r'(%s)' 

604 group = self._group_index 

605 

606 # simplest case: no type specifier ({} or {name}) 

607 if not format: 

608 self._group_index += 1 

609 return wrap % r'.+?' 

610 

611 # decode the format specification 

612 format = extract_format(format, self._extra_types) 

613 

614 # figure type conversions, if any 

615 type = format['type'] 

616 is_numeric = type and type in 'n%fegdobx' 

617 if type in self._extra_types: 

618 type_converter = self._extra_types[type] 

619 s = getattr(type_converter, 'pattern', r'.+?') 

620 regex_group_count = getattr(type_converter, 'regex_group_count', 0) 

621 if regex_group_count is None: 

622 regex_group_count = 0 

623 self._group_index += regex_group_count 

624 self._type_conversions[group] = convert_first(type_converter) 

625 elif type == 'n': 

626 s = r'\d{1,3}([,.]\d{3})*' 

627 self._group_index += 1 

628 self._type_conversions[group] = int_convert(10) 

629 elif type == 'b': 

630 s = r'(0[bB])?[01]+' 

631 self._type_conversions[group] = int_convert(2) 

632 self._group_index += 1 

633 elif type == 'o': 

634 s = r'(0[oO])?[0-7]+' 

635 self._type_conversions[group] = int_convert(8) 

636 self._group_index += 1 

637 elif type == 'x': 

638 s = r'(0[xX])?[0-9a-fA-F]+' 

639 self._type_conversions[group] = int_convert(16) 

640 self._group_index += 1 

641 elif type == '%': 

642 s = r'\d+(\.\d+)?%' 

643 self._group_index += 1 

644 self._type_conversions[group] = percentage 

645 elif type == 'f': 

646 s = r'\d*\.\d+' 

647 self._type_conversions[group] = convert_first(float) 

648 elif type == 'F': 

649 s = r'\d*\.\d+' 

650 self._type_conversions[group] = convert_first(Decimal) 

651 elif type == 'e': 

652 s = r'\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF' 

653 self._type_conversions[group] = convert_first(float) 

654 elif type == 'g': 

655 s = r'\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF' 

656 self._group_index += 2 

657 self._type_conversions[group] = convert_first(float) 

658 elif type == 'd': 

659 if format.get('width'): 

660 width = r'{1,%s}' % int(format['width']) 

661 else: 

662 width = '+' 

663 s = r'\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}'.format( 

664 w=width 

665 ) 

666 self._type_conversions[ 

667 group 

668 ] = int_convert() # do not specify number base, determine it automatically 

669 elif type == 'ti': 

670 s = r'(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?' % TIME_PAT 

671 n = self._group_index 

672 self._type_conversions[group] = partial( 

673 date_convert, ymd=n + 1, hms=n + 4, tz=n + 7 

674 ) 

675 self._group_index += 7 

676 elif type == 'tg': 

677 s = r'(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?' % ( 

678 ALL_MONTHS_PAT, 

679 TIME_PAT, 

680 AM_PAT, 

681 TZ_PAT, 

682 ) 

683 n = self._group_index 

684 self._type_conversions[group] = partial( 

685 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 

686 ) 

687 self._group_index += 9 

688 elif type == 'ta': 

689 s = r'((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?' % ( 

690 ALL_MONTHS_PAT, 

691 TIME_PAT, 

692 AM_PAT, 

693 TZ_PAT, 

694 ) 

695 n = self._group_index 

696 self._type_conversions[group] = partial( 

697 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 

698 ) 

699 self._group_index += 9 

700 elif type == 'te': 

701 # this will allow microseconds through if they're present, but meh 

702 s = r'(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s' % ( 

703 DAYS_PAT, 

704 MONTHS_PAT, 

705 TIME_PAT, 

706 TZ_PAT, 

707 ) 

708 n = self._group_index 

709 self._type_conversions[group] = partial( 

710 date_convert, dmy=n + 3, hms=n + 5, tz=n + 8 

711 ) 

712 self._group_index += 8 

713 elif type == 'th': 

714 # slight flexibility here from the stock Apache format 

715 s = r'(\d{1,2}[-/]%s[-/]\d{4}):%s%s' % (MONTHS_PAT, TIME_PAT, TZ_PAT) 

716 n = self._group_index 

717 self._type_conversions[group] = partial( 

718 date_convert, dmy=n + 1, hms=n + 3, tz=n + 6 

719 ) 

720 self._group_index += 6 

721 elif type == 'tc': 

722 s = r'(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})' % ( 

723 DAYS_PAT, 

724 MONTHS_PAT, 

725 TIME_PAT, 

726 ) 

727 n = self._group_index 

728 self._type_conversions[group] = partial( 

729 date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5 

730 ) 

731 self._group_index += 8 

732 elif type == 'tt': 

733 s = r'%s?%s?%s?' % (TIME_PAT, AM_PAT, TZ_PAT) 

734 n = self._group_index 

735 self._type_conversions[group] = partial( 

736 date_convert, hms=n + 1, am=n + 4, tz=n + 5 

737 ) 

738 self._group_index += 5 

739 elif type == 'ts': 

740 s = r'%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?' % MONTHS_PAT 

741 n = self._group_index 

742 self._type_conversions[group] = partial( 

743 date_convert, mm=n + 1, dd=n + 3, hms=n + 5 

744 ) 

745 self._group_index += 5 

746 elif type == 'l': 

747 s = r'[A-Za-z]+' 

748 elif type: 

749 s = r'\%s+' % type 

750 elif format.get('precision'): 

751 if format.get('width'): 

752 s = r'.{%s,%s}?' % (format['width'], format['precision']) 

753 else: 

754 s = r'.{1,%s}?' % format['precision'] 

755 elif format.get('width'): 

756 s = r'.{%s,}?' % format['width'] 

757 else: 

758 s = r'.+?' 

759 

760 align = format['align'] 

761 fill = format['fill'] 

762 

763 # handle some numeric-specific things like fill and sign 

764 if is_numeric: 

765 # prefix with something (align "=" trumps zero) 

766 if align == '=': 

767 # special case - align "=" acts like the zero above but with 

768 # configurable fill defaulting to "0" 

769 if not fill: 

770 fill = '0' 

771 s = r'%s*' % fill + s 

772 

773 # allow numbers to be prefixed with a sign 

774 s = r'[-+ ]?' + s 

775 

776 if not fill: 

777 fill = ' ' 

778 

779 # Place into a group now - this captures the value we want to keep. 

780 # Everything else from now is just padding to be stripped off 

781 if wrap: 

782 s = wrap % s 

783 self._group_index += 1 

784 

785 if format['width']: 

786 # all we really care about is that if the format originally 

787 # specified a width then there will probably be padding - without 

788 # an explicit alignment that'll mean right alignment with spaces 

789 # padding 

790 if not align: 

791 align = '>' 

792 

793 if fill in r'.\+?*[](){}^$': 

794 fill = '\\' + fill 

795 

796 # align "=" has been handled 

797 if align == '<': 

798 s = '%s%s*' % (s, fill) 

799 elif align == '>': 

800 s = '%s*%s' % (fill, s) 

801 elif align == '^': 

802 s = '%s*%s%s*' % (fill, s, fill) 

803 

804 return s 

805 

806 

807class Result(object): 

808 """The result of a parse() or search(). 

809 

810 Fixed results may be looked up using `result[index]`. 

811 Slices of fixed results may also be looked up. 

812 

813 Named results may be looked up using `result['name']`. 

814 

815 Named results may be tested for existence using `'name' in result`. 

816 """ 

817 

818 def __init__(self, fixed, named, spans): 

819 self.fixed = fixed 

820 self.named = named 

821 self.spans = spans 

822 

823 def __getitem__(self, item): 

824 if isinstance(item, (int, slice)): 

825 return self.fixed[item] 

826 return self.named[item] 

827 

828 def __repr__(self): 

829 return '<%s %r %r>' % (self.__class__.__name__, self.fixed, self.named) 

830 

831 def __contains__(self, name): 

832 return name in self.named 

833 

834 

835class Match(object): 

836 """The result of a parse() or search() if no results are generated. 

837 

838 This class is only used to expose internal used regex match objects 

839 to the user and use them for external Parser.evaluate_result calls. 

840 """ 

841 

842 def __init__(self, parser, match): 

843 self.parser = parser 

844 self.match = match 

845 

846 def evaluate_result(self): 

847 '''Generate results for this Match''' 

848 return self.parser.evaluate_result(self.match) 

849 

850 

851class ResultIterator(object): 

852 """The result of a findall() operation. 

853 

854 Each element is a Result instance. 

855 """ 

856 

857 def __init__(self, parser, string, pos, endpos, evaluate_result=True): 

858 self.parser = parser 

859 self.string = string 

860 self.pos = pos 

861 self.endpos = endpos 

862 self.evaluate_result = evaluate_result 

863 

864 def __iter__(self): 

865 return self 

866 

867 def __next__(self): 

868 m = self.parser._search_re.search(self.string, self.pos, self.endpos) 

869 if m is None: 

870 raise StopIteration() 

871 self.pos = m.end() 

872 

873 if self.evaluate_result: 

874 return self.parser.evaluate_result(m) 

875 else: 

876 return Match(self.parser, m) 

877 

878 # pre-py3k compat 

879 next = __next__ 

880 

881 

882def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False): 

883 """Using "format" attempt to pull values from "string". 

884 

885 The format must match the string contents exactly. If the value 

886 you're looking for is instead just a part of the string use 

887 search(). 

888 

889 If ``evaluate_result`` is True the return value will be an Result instance with two attributes: 

890 

891 .fixed - tuple of fixed-position values from the string 

892 .named - dict of named values from the string 

893 

894 If ``evaluate_result`` is False the return value will be a Match instance with one method: 

895 

896 .evaluate_result() - This will return a Result instance like you would get 

897 with ``evaluate_result`` set to True 

898 

899 The default behaviour is to match strings case insensitively. You may match with 

900 case by specifying case_sensitive=True. 

901 

902 If the format is invalid a ValueError will be raised. 

903 

904 See the module documentation for the use of "extra_types". 

905 

906 In the case there is no match parse() will return None. 

907 """ 

908 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

909 return p.parse(string, evaluate_result=evaluate_result) 

910 

911 

912def search( 

913 format, 

914 string, 

915 pos=0, 

916 endpos=None, 

917 extra_types=None, 

918 evaluate_result=True, 

919 case_sensitive=False, 

920): 

921 """Search "string" for the first occurrence of "format". 

922 

923 The format may occur anywhere within the string. If 

924 instead you wish for the format to exactly match the string 

925 use parse(). 

926 

927 Optionally start the search at "pos" character index and limit the search 

928 to a maximum index of endpos - equivalent to search(string[:endpos]). 

929 

930 If ``evaluate_result`` is True the return value will be an Result instance with two attributes: 

931 

932 .fixed - tuple of fixed-position values from the string 

933 .named - dict of named values from the string 

934 

935 If ``evaluate_result`` is False the return value will be a Match instance with one method: 

936 

937 .evaluate_result() - This will return a Result instance like you would get 

938 with ``evaluate_result`` set to True 

939 

940 The default behaviour is to match strings case insensitively. You may match with 

941 case by specifying case_sensitive=True. 

942 

943 If the format is invalid a ValueError will be raised. 

944 

945 See the module documentation for the use of "extra_types". 

946 

947 In the case there is no match parse() will return None. 

948 """ 

949 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

950 return p.search(string, pos, endpos, evaluate_result=evaluate_result) 

951 

952 

953def findall( 

954 format, 

955 string, 

956 pos=0, 

957 endpos=None, 

958 extra_types=None, 

959 evaluate_result=True, 

960 case_sensitive=False, 

961): 

962 """Search "string" for all occurrences of "format". 

963 

964 You will be returned an iterator that holds Result instances 

965 for each format match found. 

966 

967 Optionally start the search at "pos" character index and limit the search 

968 to a maximum index of endpos - equivalent to search(string[:endpos]). 

969 

970 If ``evaluate_result`` is True each returned Result instance has two attributes: 

971 

972 .fixed - tuple of fixed-position values from the string 

973 .named - dict of named values from the string 

974 

975 If ``evaluate_result`` is False each returned value is a Match instance with one method: 

976 

977 .evaluate_result() - This will return a Result instance like you would get 

978 with ``evaluate_result`` set to True 

979 

980 The default behaviour is to match strings case insensitively. You may match with 

981 case by specifying case_sensitive=True. 

982 

983 If the format is invalid a ValueError will be raised. 

984 

985 See the module documentation for the use of "extra_types". 

986 """ 

987 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

988 return p.findall(string, pos, endpos, evaluate_result=evaluate_result) 

989 

990 

991def compile(format, extra_types=None, case_sensitive=False): 

992 """Create a Parser instance to parse "format". 

993 

994 The resultant Parser has a method .parse(string) which 

995 behaves in the same manner as parse(format, string). 

996 

997 The default behaviour is to match strings case insensitively. You may match with 

998 case by specifying case_sensitive=True. 

999 

1000 Use this function if you intend to parse many strings 

1001 with the same format. 

1002 

1003 See the module documentation for the use of "extra_types". 

1004 

1005 Returns a Parser instance. 

1006 """ 

1007 return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1008 

1009 

1010# Copyright (c) 2012-2020 Richard Jones <richard@python.org> 

1011# 

1012# Permission is hereby granted, free of charge, to any person obtaining a copy 

1013# of this software and associated documentation files (the "Software"), to deal 

1014# in the Software without restriction, including without limitation the rights 

1015# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 

1016# copies of the Software, and to permit persons to whom the Software is 

1017# furnished to do so, subject to the following conditions: 

1018# 

1019# The above copyright notice and this permission notice shall be included in 

1020# all copies or substantial portions of the Software. 

1021# 

1022# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 

1023# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 

1024# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 

1025# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 

1026# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 

1027# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 

1028# SOFTWARE. 

1029 

1030# vim: set filetype=python ts=4 sw=4 et si tw=75