Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/parse/__init__.py: 81%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

551 statements  

1from __future__ import absolute_import 

2 

3import logging 

4import re 

5import sys 

6from datetime import datetime 

7from datetime import time 

8from datetime import timedelta 

9from datetime import tzinfo 

10from decimal import Decimal 

11from functools import partial 

12 

13 

14__version__ = "1.22.1" 

15__all__ = ["parse", "search", "findall", "with_pattern"] 

16 

17log = logging.getLogger(__name__) 

18 

19 

20def with_pattern(pattern, regex_group_count=None): 

21 r"""Attach a regular expression pattern matcher to a custom type converter 

22 function. 

23 

24 This annotates the type converter with the :attr:`pattern` attribute. 

25 

26 EXAMPLE: 

27 >>> import parse 

28 >>> @parse.with_pattern(r"\d+") 

29 ... def parse_number(text): 

30 ... return int(text) 

31 

32 is equivalent to: 

33 

34 >>> def parse_number(text): 

35 ... return int(text) 

36 >>> parse_number.pattern = r"\d+" 

37 

38 :param pattern: regular expression pattern (as text) 

39 :param regex_group_count: Indicates how many regex-groups are in pattern. 

40 :return: wrapped function 

41 """ 

42 

43 def decorator(func): 

44 func.pattern = pattern 

45 func.regex_group_count = regex_group_count 

46 return func 

47 

48 return decorator 

49 

50 

51class int_convert: 

52 """Convert a string to an integer. 

53 

54 The string may start with a sign. 

55 

56 It may be of a base other than 2, 8, 10 or 16. 

57 

58 If base isn't specified, it will be detected automatically based 

59 on a string format. When string starts with a base indicator, 0#nnnn, 

60 it overrides the default base of 10. 

61 

62 It may also have other non-numeric characters that we can ignore. 

63 """ 

64 

65 CHARS = "0123456789abcdefghijklmnopqrstuvwxyz" 

66 

67 def __init__(self, base=None): 

68 self.base = base 

69 

70 def __call__(self, string, match): 

71 if string[0] == "-": 

72 sign = -1 

73 number_start = 1 

74 elif string[0] == "+": 

75 sign = 1 

76 number_start = 1 

77 else: 

78 sign = 1 

79 number_start = 0 

80 

81 base = self.base 

82 # If base wasn't specified, detect it automatically 

83 if base is None: 

84 # Assume decimal number, unless different base is detected 

85 base = 10 

86 

87 # For number formats starting with 0b, 0o, 0x, use corresponding base ... 

88 if string[number_start] == "0" and len(string) - number_start > 2: 

89 if string[number_start + 1] in "bB": 

90 base = 2 

91 elif string[number_start + 1] in "oO": 

92 base = 8 

93 elif string[number_start + 1] in "xX": 

94 base = 16 

95 

96 chars = int_convert.CHARS[:base] 

97 string = re.sub("[^%s]" % chars, "", string.lower()) 

98 return sign * int(string, base) 

99 

100 

101class convert_first: 

102 """Convert the first element of a pair. 

103 This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled 

104 """ 

105 

106 def __init__(self, converter): 

107 self.converter = converter 

108 

109 def __call__(self, string, match): 

110 return self.converter(string) 

111 

112 

113def percentage(string, match): 

114 return float(string[:-1]) / 100.0 

115 

116 

117class FixedTzOffset(tzinfo): 

118 """Fixed offset in minutes east from UTC.""" 

119 

120 ZERO = timedelta(0) 

121 

122 def __init__(self, offset, name): 

123 self._offset = timedelta(minutes=offset) 

124 self._name = name 

125 

126 def __repr__(self): 

127 return "<%s %s %s>" % (self.__class__.__name__, self._name, self._offset) 

128 

129 def utcoffset(self, dt): 

130 return self._offset 

131 

132 def tzname(self, dt): 

133 return self._name 

134 

135 def dst(self, dt): 

136 return self.ZERO 

137 

138 def __eq__(self, other): 

139 if not isinstance(other, FixedTzOffset): 

140 return NotImplemented 

141 return self._name == other._name and self._offset == other._offset 

142 

143 

144MONTHS_MAP = { 

145 "Jan": 1, 

146 "January": 1, 

147 "Feb": 2, 

148 "February": 2, 

149 "Mar": 3, 

150 "March": 3, 

151 "Apr": 4, 

152 "April": 4, 

153 "May": 5, 

154 "Jun": 6, 

155 "June": 6, 

156 "Jul": 7, 

157 "July": 7, 

158 "Aug": 8, 

159 "August": 8, 

160 "Sep": 9, 

161 "September": 9, 

162 "Oct": 10, 

163 "October": 10, 

164 "Nov": 11, 

165 "November": 11, 

166 "Dec": 12, 

167 "December": 12, 

168} 

169DAYS_PAT = r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)" 

170MONTHS_PAT = r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)" 

171ALL_MONTHS_PAT = r"(%s)" % "|".join(MONTHS_MAP) 

172TIME_PAT = r"(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)" 

173AM_PAT = r"(\s+[AP]M)" 

174TZ_PAT = r"(\s+[-+]\d\d?:?\d\d)" 

175 

176 

177def date_convert( 

178 string, 

179 match, 

180 ymd=None, 

181 mdy=None, 

182 dmy=None, 

183 d_m_y=None, 

184 hms=None, 

185 am=None, 

186 tz=None, 

187 mm=None, 

188 dd=None, 

189): 

190 """Convert the incoming string containing some date / time info into a 

191 datetime instance. 

192 """ 

193 groups = match.groups() 

194 time_only = False 

195 if mm and dd: 

196 y = datetime.today().year 

197 m = groups[mm] 

198 d = groups[dd] 

199 elif ymd is not None: 

200 y, m, d = re.split(r"[-/\s]", groups[ymd]) 

201 elif mdy is not None: 

202 m, d, y = re.split(r"[-/\s]", groups[mdy]) 

203 elif dmy is not None: 

204 d, m, y = re.split(r"[-/\s]", groups[dmy]) 

205 elif d_m_y is not None: 

206 d, m, y = d_m_y 

207 d = groups[d] 

208 m = groups[m] 

209 y = groups[y] 

210 else: 

211 time_only = True 

212 

213 H = M = S = u = 0 

214 if hms is not None and groups[hms]: 

215 t = groups[hms].split(":") 

216 if len(t) == 2: 

217 H, M = t 

218 else: 

219 H, M, S = t 

220 if "." in S: 

221 S, u = S.split(".") 

222 u = int(u.ljust(6, "0")[:6]) 

223 S = int(S) 

224 H = int(H) 

225 M = int(M) 

226 

227 if am is not None: 

228 am = groups[am] 

229 if am: 

230 am = am.strip() 

231 if am == "AM" and H == 12: 

232 # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock 

233 H -= 12 

234 elif am == "PM" and H == 12: 

235 # no correction needed: 12PM is midday, 12:00 by 24 hour clock 

236 pass 

237 elif am == "PM": 

238 H += 12 

239 

240 if tz is not None: 

241 tz = groups[tz] 

242 if tz == "Z": 

243 tz = FixedTzOffset(0, "UTC") 

244 elif tz: 

245 tz = tz.strip() 

246 if tz.isupper(): 

247 # TODO use the awesome python TZ module? 

248 pass 

249 else: 

250 sign = tz[0] 

251 if ":" in tz: 

252 tzh, tzm = tz[1:].split(":") 

253 elif len(tz) == 4: # 'snnn' 

254 tzh, tzm = tz[1], tz[2:4] 

255 else: 

256 tzh, tzm = tz[1:3], tz[3:5] 

257 offset = int(tzm) + int(tzh) * 60 

258 if sign == "-": 

259 offset = -offset 

260 tz = FixedTzOffset(offset, tz) 

261 

262 if time_only: 

263 d = time(H, M, S, u, tzinfo=tz) 

264 else: 

265 y = int(y) 

266 if m.isdigit(): 

267 m = int(m) 

268 else: 

269 m = MONTHS_MAP[m] 

270 d = int(d) 

271 d = datetime(y, m, d, H, M, S, u, tzinfo=tz) 

272 

273 return d 

274 

275 

276def strf_date_convert(x, _, type): 

277 is_date = any("%" + x in type for x in "aAwdbBmyYjUW") 

278 is_time = any("%" + x in type for x in "HIpMSfz") 

279 

280 dt = datetime.strptime(x, type) 

281 if "%y" not in type and "%Y" not in type: # year not specified 

282 dt = dt.replace(year=datetime.today().year) 

283 

284 if is_date and is_time: 

285 return dt 

286 elif is_date: 

287 return dt.date() 

288 elif is_time: 

289 return dt.time() 

290 else: 

291 raise ValueError("Datetime not a date nor a time?") 

292 

293 

294# ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes 

295dt_format_to_regex = { 

296 "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", 

297 "%A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)", 

298 "%w": "[0-6]", 

299 "%d": "[0-9]{1,2}", 

300 "%b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", 

301 "%B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)", 

302 "%m": "[0-9]{1,2}", 

303 "%y": "[0-9]{2}", 

304 "%Y": "[0-9]{4}", 

305 "%H": "[0-9]{1,2}", 

306 "%I": "[0-9]{1,2}", 

307 "%p": "(?:AM|PM)", 

308 "%M": "[0-9]{2}", 

309 "%S": "[0-9]{2}", 

310 "%f": "[0-9]{1,6}", 

311 "%z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", 

312 # "%Z": punt 

313 "%j": "[0-9]{1,3}", 

314 "%U": "[0-9]{1,2}", 

315 "%W": "[0-9]{1,2}", 

316} 

317 

318# Compile a regular expression pattern that matches any date/time format symbol. 

319dt_format_symbols_re = re.compile("|".join(dt_format_to_regex)) 

320 

321 

322def get_regex_for_datetime_format(format_): 

323 """ 

324 Generate a regex pattern for a given datetime format string. 

325 

326 Parameters: 

327 format_ (str): The datetime format string. 

328 

329 Returns: 

330 str: A regex pattern corresponding to the datetime format string. 

331 """ 

332 # Replace all format symbols with their regex patterns. 

333 return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)], format_) 

334 

335 

336class TooManyFields(ValueError): 

337 pass 

338 

339 

340class RepeatedNameError(ValueError): 

341 pass 

342 

343 

344# note: {} are handled separately 

345REGEX_SAFETY = re.compile(r"([?\\.[\]()*+^$!|])") 

346 

347# allowed field types 

348ALLOWED_TYPES = set(list("nbox%fFegwWdDsSl") + ["t" + c for c in "ieahgcts"]) 

349 

350 

351def extract_format(format, extra_types): 

352 """Pull apart the format [[fill]align][sign][0][width][grouping][.precision][type]""" 

353 fill = align = None 

354 if format[0] in "<>=^": 

355 align = format[0] 

356 format = format[1:] 

357 elif len(format) > 1 and format[1] in "<>=^": 

358 fill = format[0] 

359 align = format[1] 

360 format = format[2:] 

361 

362 if format.startswith(("+", "-", " ")): 

363 format = format[1:] 

364 

365 zero = False 

366 if format and format[0] == "0": 

367 zero = True 

368 format = format[1:] 

369 

370 width = "" 

371 while format: 

372 if not format[0].isdigit(): 

373 break 

374 width += format[0] 

375 format = format[1:] 

376 

377 # Extract grouping option 

378 if format.startswith(","): 

379 format = format[1:] 

380 grouping = "," 

381 elif format.startswith("_"): 

382 format = format[1:] 

383 grouping = "_" 

384 

385 if format.startswith("."): 

386 # Precision isn't needed but we need to capture it so that 

387 # the ValueError isn't raised. 

388 format = format[1:] # drop the '.' 

389 precision = "" 

390 while format: 

391 if not format[0].isdigit(): 

392 break 

393 precision += format[0] 

394 format = format[1:] 

395 

396 # the rest is the type, if present 

397 type = format 

398 if ( 

399 type 

400 and type not in ALLOWED_TYPES 

401 and type not in extra_types 

402 and not any(k in type for k in dt_format_to_regex) 

403 ): 

404 raise ValueError("format spec %r not recognised" % type) 

405 

406 return locals() 

407 

408 

409PARSE_RE = re.compile(r"({{|}}|{[\w-]*(?:\.[\w-]+|\[[^]]+])*(?::[^}]+)?})") 

410 

411 

412class Parser(object): 

413 """Encapsulate a format string that may be used to parse other strings.""" 

414 

415 def __init__(self, format, extra_types=None, case_sensitive=False): 

416 # a mapping of a name as in {hello.world} to a regex-group compatible 

417 # name, like hello__world. It's used to prevent the transformation of 

418 # name-to-group and group to name to fail subtly, such as in: 

419 # hello_.world-> hello___world->hello._world 

420 self._group_to_name_map = {} 

421 # also store the original field name to group name mapping to allow 

422 # multiple instances of a name in the format string 

423 self._name_to_group_map = {} 

424 # and to sanity check the repeated instances store away the first 

425 # field type specification for the named field 

426 self._name_types = {} 

427 

428 self._format = format 

429 if extra_types is None: 

430 extra_types = {} 

431 self._extra_types = extra_types 

432 if case_sensitive: 

433 self._re_flags = re.DOTALL 

434 else: 

435 self._re_flags = re.IGNORECASE | re.DOTALL 

436 self._fixed_fields = [] 

437 self._named_fields = [] 

438 self._group_index = 0 

439 self._type_conversions = {} 

440 self._expression = self._generate_expression() 

441 self.__search_re = None 

442 self.__match_re = None 

443 

444 log.debug("format %r -> %r", format, self._expression) 

445 

446 def __repr__(self): 

447 if len(self._format) > 20: 

448 return "<%s %r>" % (self.__class__.__name__, self._format[:17] + "...") 

449 return "<%s %r>" % (self.__class__.__name__, self._format) 

450 

451 @property 

452 def _search_re(self): 

453 if self.__search_re is None: 

454 try: 

455 self.__search_re = re.compile(self._expression, self._re_flags) 

456 except AssertionError: 

457 # access error through sys to keep py3k and backward compat 

458 e = str(sys.exc_info()[1]) 

459 if e.endswith("this version only supports 100 named groups"): 

460 raise TooManyFields( 

461 "sorry, you are attempting to parse too many complex fields" 

462 ) 

463 return self.__search_re 

464 

465 @property 

466 def _match_re(self): 

467 if self.__match_re is None: 

468 expression = r"\A%s\Z" % self._expression 

469 try: 

470 self.__match_re = re.compile(expression, self._re_flags) 

471 except AssertionError: 

472 # access error through sys to keep py3k and backward compat 

473 e = str(sys.exc_info()[1]) 

474 if e.endswith("this version only supports 100 named groups"): 

475 raise TooManyFields( 

476 "sorry, you are attempting to parse too many complex fields" 

477 ) 

478 except re.error: 

479 raise NotImplementedError( 

480 "Group names (e.g. (?P<name>) can " 

481 "cause failure, as they are not escaped properly: '%s'" % expression 

482 ) 

483 return self.__match_re 

484 

485 @property 

486 def named_fields(self): 

487 return self._named_fields[:] 

488 

489 @property 

490 def fixed_fields(self): 

491 return self._fixed_fields[:] 

492 

493 @property 

494 def format(self): 

495 return self._format 

496 

497 def parse(self, string, evaluate_result=True): 

498 """Match my format to the string exactly. 

499 

500 Return a Result or Match instance or None if there's no match. 

501 """ 

502 m = self._match_re.match(string) 

503 if m is None: 

504 return None 

505 

506 if evaluate_result: 

507 return self.evaluate_result(m) 

508 else: 

509 return Match(self, m) 

510 

511 def search(self, string, pos=0, endpos=None, evaluate_result=True): 

512 """Search the string for my format. 

513 

514 Optionally start the search at "pos" character index and limit the 

515 search to a maximum index of endpos - equivalent to 

516 search(string[:endpos]). 

517 

518 If the ``evaluate_result`` argument is set to ``False`` a 

519 Match instance is returned instead of the actual Result instance. 

520 

521 Return either a Result instance or None if there's no match. 

522 """ 

523 if endpos is None: 

524 endpos = len(string) 

525 m = self._search_re.search(string, pos, endpos) 

526 if m is None: 

527 return None 

528 

529 if evaluate_result: 

530 return self.evaluate_result(m) 

531 else: 

532 return Match(self, m) 

533 

534 def findall( 

535 self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True 

536 ): 

537 """Search "string" for all occurrences of "format". 

538 

539 Optionally start the search at "pos" character index and limit the 

540 search to a maximum index of endpos - equivalent to 

541 search(string[:endpos]). 

542 

543 Returns an iterator that holds Result or Match instances for each format match 

544 found. 

545 """ 

546 if endpos is None: 

547 endpos = len(string) 

548 return ResultIterator( 

549 self, string, pos, endpos, evaluate_result=evaluate_result 

550 ) 

551 

552 def _expand_named_fields(self, named_fields): 

553 result = {} 

554 for field, value in named_fields.items(): 

555 # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...' 

556 n = field.find("[") 

557 if n == -1: 

558 basename, subkeys = field, "" 

559 else: 

560 basename, subkeys = field[:n], field[n:] 

561 

562 # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}} 

563 d = result 

564 k = basename 

565 

566 if subkeys: 

567 for subkey in re.findall(r"\[[^]]+]", subkeys): 

568 d = d.setdefault(k, {}) 

569 k = subkey[1:-1] 

570 

571 # assign the value to the last key 

572 d[k] = value 

573 

574 return result 

575 

576 def evaluate_result(self, m): 

577 """Generate a Result instance for the given regex match object""" 

578 # ok, figure the fixed fields we've pulled out and type convert them 

579 fixed_fields = list(m.groups()) 

580 for n in self._fixed_fields: 

581 if n in self._type_conversions: 

582 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m) 

583 fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields) 

584 

585 # grab the named fields, converting where requested 

586 groupdict = m.groupdict() 

587 named_fields = {} 

588 name_map = {} 

589 for k in self._named_fields: 

590 korig = self._group_to_name_map[k] 

591 name_map[korig] = k 

592 if k in self._type_conversions: 

593 value = self._type_conversions[k](groupdict[k], m) 

594 else: 

595 value = groupdict[k] 

596 

597 named_fields[korig] = value 

598 

599 # now figure the match spans 

600 spans = {n: m.span(name_map[n]) for n in named_fields} 

601 spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields)) 

602 

603 # and that's our result 

604 return Result(fixed_fields, self._expand_named_fields(named_fields), spans) 

605 

606 def _regex_replace(self, match): 

607 return "\\" + match.group(1) 

608 

609 def _generate_expression(self): 

610 # turn my _format attribute into the _expression attribute 

611 e = [] 

612 for part in PARSE_RE.split(self._format): 

613 if not part: 

614 continue 

615 elif part == "{{": 

616 e.append(r"\{") 

617 elif part == "}}": 

618 e.append(r"\}") 

619 elif part[0] == "{" and part[-1] == "}": 

620 # this will be a braces-delimited field to handle 

621 e.append(self._handle_field(part)) 

622 else: 

623 # just some text to match 

624 e.append(REGEX_SAFETY.sub(self._regex_replace, part)) 

625 return "".join(e) 

626 

627 def _to_group_name(self, field): 

628 # return a version of field which can be used as capture group, even 

629 # though it might contain '.' 

630 group = field.replace(".", "_").replace("[", "_").replace("]", "_").replace("-", "_") 

631 

632 # make sure we don't collide ("a.b" colliding with "a_b") 

633 n = 1 

634 while group in self._group_to_name_map: 

635 n += 1 

636 if "." in field: 

637 group = field.replace(".", "_" * n) 

638 elif "_" in field: 

639 group = field.replace("_", "_" * n) 

640 elif "-" in field: 

641 group = field.replace("-", "_" * n) 

642 else: 

643 raise KeyError("duplicated group name %r" % (field,)) 

644 

645 # save off the mapping 

646 self._group_to_name_map[group] = field 

647 self._name_to_group_map[field] = group 

648 return group 

649 

650 def _handle_field(self, field): 

651 # first: lose the braces 

652 field = field[1:-1] 

653 

654 # now figure whether this is an anonymous or named field, and whether 

655 # there's any format specification 

656 format = "" 

657 

658 if ":" in field: 

659 name, format = field.split(":", 1) 

660 else: 

661 name = field 

662 

663 # This *should* be more flexible, but parsing complicated structures 

664 # out of the string is hard (and not necessarily useful) ... and I'm 

665 # being lazy. So for now `identifier` is "anything starting with a 

666 # letter" and digit args don't get attribute or element stuff. 

667 if name and name[0].isalpha(): 

668 if name in self._name_to_group_map: 

669 if self._name_types[name] != format: 

670 raise RepeatedNameError( 

671 'field type %r for field "%s" ' 

672 "does not match previous seen type %r" 

673 % (format, name, self._name_types[name]) 

674 ) 

675 group = self._name_to_group_map[name] 

676 # match previously-seen value 

677 return r"(?P=%s)" % group 

678 else: 

679 group = self._to_group_name(name) 

680 self._name_types[name] = format 

681 self._named_fields.append(group) 

682 # this will become a group, which must not contain dots 

683 wrap = r"(?P<%s>%%s)" % group 

684 else: 

685 self._fixed_fields.append(self._group_index) 

686 wrap = r"(%s)" 

687 group = self._group_index 

688 

689 # simplest case: no type specifier ({} or {name}) 

690 if not format: 

691 self._group_index += 1 

692 return wrap % r".+?" 

693 

694 # decode the format specification 

695 format = extract_format(format, self._extra_types) 

696 

697 # figure type conversions, if any 

698 type = format["type"] 

699 is_numeric = type and type in "n%fegdobx" 

700 conv = self._type_conversions 

701 if type in self._extra_types: 

702 type_converter = self._extra_types[type] 

703 s = getattr(type_converter, "pattern", r".+?") 

704 regex_group_count = getattr(type_converter, "regex_group_count", 0) 

705 if regex_group_count is None: 

706 regex_group_count = 0 

707 self._group_index += regex_group_count 

708 conv[group] = convert_first(type_converter) 

709 elif type == "n": 

710 s = r"\d{1,3}([,.]\d{3})*" 

711 self._group_index += 1 

712 conv[group] = int_convert(10) 

713 elif type == "b": 

714 s = r"(0[bB])?[01]+" 

715 conv[group] = int_convert(2) 

716 self._group_index += 1 

717 elif type == "o": 

718 s = r"(0[oO])?[0-7]+" 

719 conv[group] = int_convert(8) 

720 self._group_index += 1 

721 elif type == "x": 

722 s = r"(0[xX])?[0-9a-fA-F]+" 

723 conv[group] = int_convert(16) 

724 self._group_index += 1 

725 elif type == "%": 

726 s = r"\d+(\.\d+)?%" 

727 self._group_index += 1 

728 conv[group] = percentage 

729 elif type == "f": 

730 # precision 0 formats without a decimal point (e.g. format(20.0, ".0f") == "20") 

731 s = r"\d+" if format.get("precision") == "0" else r"\d*\.\d+" 

732 conv[group] = convert_first(float) 

733 elif type == "F": 

734 s = r"\d+" if format.get("precision") == "0" else r"\d*\.\d+" 

735 conv[group] = convert_first(Decimal) 

736 elif type == "e": 

737 s = r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF" 

738 conv[group] = convert_first(float) 

739 elif type == "g": 

740 s = r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF" 

741 self._group_index += 2 

742 conv[group] = convert_first(float) 

743 elif type == "d": 

744 if format.get("width"): 

745 width = r"{1,%s}" % int(format["width"]) 

746 else: 

747 width = "+" 

748 s = r"[-+ ]?[0-9{g}]{w}|[-+ ]?0[xX][0-9a-fA-F{g}]{w}|[-+ ]?0[bB][01{g}]{w}|[-+ ]?0[oO][0-7{g}]{w}".format( 

749 w=width, 

750 g=format.get("grouping", ""), 

751 ) 

752 conv[group] = int_convert() 

753 # do not specify number base, determine it automatically 

754 elif any(k in type for k in dt_format_to_regex): 

755 s = get_regex_for_datetime_format(type) 

756 conv[group] = partial(strf_date_convert, type=type) 

757 elif type == "ti": 

758 s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT 

759 n = self._group_index 

760 conv[group] = partial(date_convert, ymd=n + 1, hms=n + 4, tz=n + 7) 

761 self._group_index += 7 

762 elif type == "tg": 

763 s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?" 

764 s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) 

765 n = self._group_index 

766 conv[group] = partial( 

767 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 

768 ) 

769 self._group_index += 9 

770 elif type == "ta": 

771 s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?" 

772 s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) 

773 n = self._group_index 

774 conv[group] = partial( 

775 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 

776 ) 

777 self._group_index += 9 

778 elif type == "te": 

779 # this will allow microseconds through if they're present, but meh 

780 s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s" 

781 s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT, TZ_PAT) 

782 n = self._group_index 

783 conv[group] = partial(date_convert, dmy=n + 3, hms=n + 5, tz=n + 8) 

784 self._group_index += 8 

785 elif type == "th": 

786 # slight flexibility here from the stock Apache format 

787 s = r"(\d{1,2}[-/]%s[-/]\d{4}):%s%s" % (MONTHS_PAT, TIME_PAT, TZ_PAT) 

788 n = self._group_index 

789 conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6) 

790 self._group_index += 6 

791 elif type == "tc": 

792 s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" 

793 s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT) 

794 n = self._group_index 

795 conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5) 

796 self._group_index += 8 

797 elif type == "tt": 

798 s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) 

799 n = self._group_index 

800 conv[group] = partial(date_convert, hms=n + 1, am=n + 4, tz=n + 5) 

801 self._group_index += 5 

802 elif type == "ts": 

803 s = r"%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?" % MONTHS_PAT 

804 n = self._group_index 

805 conv[group] = partial(date_convert, mm=n + 1, dd=n + 3, hms=n + 5) 

806 self._group_index += 5 

807 elif type == "l": 

808 s = r"[A-Za-z]+" 

809 elif type: 

810 s = r"\%s+" % type 

811 elif format.get("precision"): 

812 if format.get("width"): 

813 s = r".{%s,%s}?" % (format["width"], format["precision"]) 

814 else: 

815 s = r".{1,%s}?" % format["precision"] 

816 elif format.get("width"): 

817 s = r".{%s,}?" % format["width"] 

818 else: 

819 s = r".+?" 

820 

821 align = format["align"] 

822 fill = format["fill"] 

823 

824 # handle some numeric-specific things like fill and sign 

825 if is_numeric: 

826 # prefix with something (align "=" trumps zero) 

827 if align == "=": 

828 # special case - align "=" acts like the zero above but with 

829 # configurable fill defaulting to "0" 

830 if not fill: 

831 fill = "0" 

832 s = r"%s*" % fill + s 

833 

834 # allow numbers to be prefixed with a sign 

835 s = r"[-+ ]?" + s 

836 

837 if not fill: 

838 fill = " " 

839 

840 # Place into a group now - this captures the value we want to keep. 

841 # Everything else from now is just padding to be stripped off 

842 if wrap: 

843 s = wrap % s 

844 self._group_index += 1 

845 

846 if format["width"]: 

847 # all we really care about is that if the format originally 

848 # specified a width then there will probably be padding - without 

849 # an explicit alignment that'll mean right alignment with spaces 

850 # padding 

851 if not align: 

852 align = ">" 

853 

854 if fill in r".\+?*[](){}^$": 

855 fill = "\\" + fill 

856 

857 # align "=" has been handled 

858 if align == "<": 

859 s = "%s%s*" % (s, fill) 

860 elif align == ">": 

861 s = "%s*%s" % (fill, s) 

862 elif align == "^": 

863 s = "%s*%s%s*" % (fill, s, fill) 

864 

865 return s 

866 

867 

868class Result(object): 

869 """The result of a parse() or search(). 

870 

871 Fixed results may be looked up using `result[index]`. 

872 Slices of fixed results may also be looked up. 

873 

874 Named results may be looked up using `result['name']`. 

875 

876 Named results may be tested for existence using `'name' in result`. 

877 """ 

878 

879 def __init__(self, fixed, named, spans): 

880 self.fixed = fixed 

881 self.named = named 

882 self.spans = spans 

883 

884 def __getitem__(self, item): 

885 if isinstance(item, (int, slice)): 

886 return self.fixed[item] 

887 return self.named[item] 

888 

889 def __repr__(self): 

890 return "<%s %r %r>" % (self.__class__.__name__, self.fixed, self.named) 

891 

892 def __contains__(self, name): 

893 return name in self.named 

894 

895 

896class Match(object): 

897 """The result of a parse() or search() if no results are generated. 

898 

899 This class is only used to expose internal used regex match objects 

900 to the user and use them for external Parser.evaluate_result calls. 

901 """ 

902 

903 def __init__(self, parser, match): 

904 self.parser = parser 

905 self.match = match 

906 

907 def evaluate_result(self): 

908 """Generate results for this Match""" 

909 return self.parser.evaluate_result(self.match) 

910 

911 

912class ResultIterator(object): 

913 """The result of a findall() operation. 

914 

915 Each element is a Result instance. 

916 """ 

917 

918 def __init__(self, parser, string, pos, endpos, evaluate_result=True): 

919 self.parser = parser 

920 self.string = string 

921 self.pos = pos 

922 self.endpos = endpos 

923 self.evaluate_result = evaluate_result 

924 

925 def __iter__(self): 

926 return self 

927 

928 def __next__(self): 

929 m = self.parser._search_re.search(self.string, self.pos, self.endpos) 

930 if m is None: 

931 raise StopIteration() 

932 self.pos = m.end() 

933 

934 if self.evaluate_result: 

935 return self.parser.evaluate_result(m) 

936 else: 

937 return Match(self.parser, m) 

938 

939 # pre-py3k compat 

940 next = __next__ 

941 

942 

943def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False): 

944 """Using "format" attempt to pull values from "string". 

945 

946 The format must match the string contents exactly. If the value 

947 you're looking for is instead just a part of the string use 

948 search(). 

949 

950 If ``evaluate_result`` is True the return value will be an Result instance with two attributes: 

951 

952 .fixed - tuple of fixed-position values from the string 

953 .named - dict of named values from the string 

954 

955 If ``evaluate_result`` is False the return value will be a Match instance with one method: 

956 

957 .evaluate_result() - This will return a Result instance like you would get 

958 with ``evaluate_result`` set to True 

959 

960 The default behaviour is to match strings case insensitively. You may match with 

961 case by specifying case_sensitive=True. 

962 

963 If the format is invalid a ValueError will be raised. 

964 

965 See the module documentation for the use of "extra_types". 

966 

967 In the case there is no match parse() will return None. 

968 """ 

969 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

970 return p.parse(string, evaluate_result=evaluate_result) 

971 

972 

973def search( 

974 format, 

975 string, 

976 pos=0, 

977 endpos=None, 

978 extra_types=None, 

979 evaluate_result=True, 

980 case_sensitive=False, 

981): 

982 """Search "string" for the first occurrence of "format". 

983 

984 The format may occur anywhere within the string. If 

985 instead you wish for the format to exactly match the string 

986 use parse(). 

987 

988 Optionally start the search at "pos" character index and limit the search 

989 to a maximum index of endpos - equivalent to search(string[:endpos]). 

990 

991 If ``evaluate_result`` is True the return value will be an Result instance with two attributes: 

992 

993 .fixed - tuple of fixed-position values from the string 

994 .named - dict of named values from the string 

995 

996 If ``evaluate_result`` is False the return value will be a Match instance with one method: 

997 

998 .evaluate_result() - This will return a Result instance like you would get 

999 with ``evaluate_result`` set to True 

1000 

1001 The default behaviour is to match strings case insensitively. You may match with 

1002 case by specifying case_sensitive=True. 

1003 

1004 If the format is invalid a ValueError will be raised. 

1005 

1006 See the module documentation for the use of "extra_types". 

1007 

1008 In the case there is no match parse() will return None. 

1009 """ 

1010 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1011 return p.search(string, pos, endpos, evaluate_result=evaluate_result) 

1012 

1013 

1014def findall( 

1015 format, 

1016 string, 

1017 pos=0, 

1018 endpos=None, 

1019 extra_types=None, 

1020 evaluate_result=True, 

1021 case_sensitive=False, 

1022): 

1023 """Search "string" for all occurrences of "format". 

1024 

1025 You will be returned an iterator that holds Result instances 

1026 for each format match found. 

1027 

1028 Optionally start the search at "pos" character index and limit the search 

1029 to a maximum index of endpos - equivalent to search(string[:endpos]). 

1030 

1031 If ``evaluate_result`` is True each returned Result instance has two attributes: 

1032 

1033 .fixed - tuple of fixed-position values from the string 

1034 .named - dict of named values from the string 

1035 

1036 If ``evaluate_result`` is False each returned value is a Match instance with one method: 

1037 

1038 .evaluate_result() - This will return a Result instance like you would get 

1039 with ``evaluate_result`` set to True 

1040 

1041 The default behaviour is to match strings case insensitively. You may match with 

1042 case by specifying case_sensitive=True. 

1043 

1044 If the format is invalid a ValueError will be raised. 

1045 

1046 See the module documentation for the use of "extra_types". 

1047 """ 

1048 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1049 return p.findall(string, pos, endpos, evaluate_result=evaluate_result) 

1050 

1051 

1052def compile(format, extra_types=None, case_sensitive=False): 

1053 """Create a Parser instance to parse "format". 

1054 

1055 The resultant Parser has a method .parse(string) which 

1056 behaves in the same manner as parse(format, string). 

1057 

1058 The default behaviour is to match strings case insensitively. You may match with 

1059 case by specifying case_sensitive=True. 

1060 

1061 Use this function if you intend to parse many strings 

1062 with the same format. 

1063 

1064 See the module documentation for the use of "extra_types". 

1065 

1066 Returns a Parser instance. 

1067 """ 

1068 return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1069 

1070 

1071# Copyright (c) 2012-2020 Richard Jones <richard@python.org> 

1072# 

1073# Permission is hereby granted, free of charge, to any person obtaining a copy 

1074# of this software and associated documentation files (the "Software"), to deal 

1075# in the Software without restriction, including without limitation the rights 

1076# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 

1077# copies of the Software, and to permit persons to whom the Software is 

1078# furnished to do so, subject to the following conditions: 

1079# 

1080# The above copyright notice and this permission notice shall be included in 

1081# all copies or substantial portions of the Software. 

1082# 

1083# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 

1084# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 

1085# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 

1086# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 

1087# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 

1088# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 

1089# SOFTWARE. 

1090 

1091# vim: set filetype=python ts=4 sw=4 et si tw=75