Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/parse.py: 77%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

545 statements  

1from __future__ import absolute_import 

2 

3import logging 

4import re 

5import sys 

6from datetime import datetime 

7from datetime import time 

8from datetime import timedelta 

9from datetime import tzinfo 

10from decimal import Decimal 

11from functools import partial 

12 

13 

14__version__ = "1.20.2" 

15__all__ = ["parse", "search", "findall", "with_pattern"] 

16 

17log = logging.getLogger(__name__) 

18 

19 

20def with_pattern(pattern, regex_group_count=None): 

21 r"""Attach a regular expression pattern matcher to a custom type converter 

22 function. 

23 

24 This annotates the type converter with the :attr:`pattern` attribute. 

25 

26 EXAMPLE: 

27 >>> import parse 

28 >>> @parse.with_pattern(r"\d+") 

29 ... def parse_number(text): 

30 ... return int(text) 

31 

32 is equivalent to: 

33 

34 >>> def parse_number(text): 

35 ... return int(text) 

36 >>> parse_number.pattern = r"\d+" 

37 

38 :param pattern: regular expression pattern (as text) 

39 :param regex_group_count: Indicates how many regex-groups are in pattern. 

40 :return: wrapped function 

41 """ 

42 

43 def decorator(func): 

44 func.pattern = pattern 

45 func.regex_group_count = regex_group_count 

46 return func 

47 

48 return decorator 

49 

50 

51class int_convert: 

52 """Convert a string to an integer. 

53 

54 The string may start with a sign. 

55 

56 It may be of a base other than 2, 8, 10 or 16. 

57 

58 If base isn't specified, it will be detected automatically based 

59 on a string format. When string starts with a base indicator, 0#nnnn, 

60 it overrides the default base of 10. 

61 

62 It may also have other non-numeric characters that we can ignore. 

63 """ 

64 

65 CHARS = "0123456789abcdefghijklmnopqrstuvwxyz" 

66 

67 def __init__(self, base=None): 

68 self.base = base 

69 

70 def __call__(self, string, match): 

71 if string[0] == "-": 

72 sign = -1 

73 number_start = 1 

74 elif string[0] == "+": 

75 sign = 1 

76 number_start = 1 

77 else: 

78 sign = 1 

79 number_start = 0 

80 

81 base = self.base 

82 # If base wasn't specified, detect it automatically 

83 if base is None: 

84 # Assume decimal number, unless different base is detected 

85 base = 10 

86 

87 # For number formats starting with 0b, 0o, 0x, use corresponding base ... 

88 if string[number_start] == "0" and len(string) - number_start > 2: 

89 if string[number_start + 1] in "bB": 

90 base = 2 

91 elif string[number_start + 1] in "oO": 

92 base = 8 

93 elif string[number_start + 1] in "xX": 

94 base = 16 

95 

96 chars = int_convert.CHARS[:base] 

97 string = re.sub("[^%s]" % chars, "", string.lower()) 

98 return sign * int(string, base) 

99 

100 

101class convert_first: 

102 """Convert the first element of a pair. 

103 This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled 

104 """ 

105 

106 def __init__(self, converter): 

107 self.converter = converter 

108 

109 def __call__(self, string, match): 

110 return self.converter(string) 

111 

112 

113def percentage(string, match): 

114 return float(string[:-1]) / 100.0 

115 

116 

117class FixedTzOffset(tzinfo): 

118 """Fixed offset in minutes east from UTC.""" 

119 

120 ZERO = timedelta(0) 

121 

122 def __init__(self, offset, name): 

123 self._offset = timedelta(minutes=offset) 

124 self._name = name 

125 

126 def __repr__(self): 

127 return "<%s %s %s>" % (self.__class__.__name__, self._name, self._offset) 

128 

129 def utcoffset(self, dt): 

130 return self._offset 

131 

132 def tzname(self, dt): 

133 return self._name 

134 

135 def dst(self, dt): 

136 return self.ZERO 

137 

138 def __eq__(self, other): 

139 if not isinstance(other, FixedTzOffset): 

140 return NotImplemented 

141 return self._name == other._name and self._offset == other._offset 

142 

143 

144MONTHS_MAP = { 

145 "Jan": 1, 

146 "January": 1, 

147 "Feb": 2, 

148 "February": 2, 

149 "Mar": 3, 

150 "March": 3, 

151 "Apr": 4, 

152 "April": 4, 

153 "May": 5, 

154 "Jun": 6, 

155 "June": 6, 

156 "Jul": 7, 

157 "July": 7, 

158 "Aug": 8, 

159 "August": 8, 

160 "Sep": 9, 

161 "September": 9, 

162 "Oct": 10, 

163 "October": 10, 

164 "Nov": 11, 

165 "November": 11, 

166 "Dec": 12, 

167 "December": 12, 

168} 

169DAYS_PAT = r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)" 

170MONTHS_PAT = r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)" 

171ALL_MONTHS_PAT = r"(%s)" % "|".join(MONTHS_MAP) 

172TIME_PAT = r"(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)" 

173AM_PAT = r"(\s+[AP]M)" 

174TZ_PAT = r"(\s+[-+]\d\d?:?\d\d)" 

175 

176 

177def date_convert( 

178 string, 

179 match, 

180 ymd=None, 

181 mdy=None, 

182 dmy=None, 

183 d_m_y=None, 

184 hms=None, 

185 am=None, 

186 tz=None, 

187 mm=None, 

188 dd=None, 

189): 

190 """Convert the incoming string containing some date / time info into a 

191 datetime instance. 

192 """ 

193 groups = match.groups() 

194 time_only = False 

195 if mm and dd: 

196 y = datetime.today().year 

197 m = groups[mm] 

198 d = groups[dd] 

199 elif ymd is not None: 

200 y, m, d = re.split(r"[-/\s]", groups[ymd]) 

201 elif mdy is not None: 

202 m, d, y = re.split(r"[-/\s]", groups[mdy]) 

203 elif dmy is not None: 

204 d, m, y = re.split(r"[-/\s]", groups[dmy]) 

205 elif d_m_y is not None: 

206 d, m, y = d_m_y 

207 d = groups[d] 

208 m = groups[m] 

209 y = groups[y] 

210 else: 

211 time_only = True 

212 

213 H = M = S = u = 0 

214 if hms is not None and groups[hms]: 

215 t = groups[hms].split(":") 

216 if len(t) == 2: 

217 H, M = t 

218 else: 

219 H, M, S = t 

220 if "." in S: 

221 S, u = S.split(".") 

222 u = int(float("." + u) * 1000000) 

223 S = int(S) 

224 H = int(H) 

225 M = int(M) 

226 

227 if am is not None: 

228 am = groups[am] 

229 if am: 

230 am = am.strip() 

231 if am == "AM" and H == 12: 

232 # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock 

233 H -= 12 

234 elif am == "PM" and H == 12: 

235 # no correction needed: 12PM is midday, 12:00 by 24 hour clock 

236 pass 

237 elif am == "PM": 

238 H += 12 

239 

240 if tz is not None: 

241 tz = groups[tz] 

242 if tz == "Z": 

243 tz = FixedTzOffset(0, "UTC") 

244 elif tz: 

245 tz = tz.strip() 

246 if tz.isupper(): 

247 # TODO use the awesome python TZ module? 

248 pass 

249 else: 

250 sign = tz[0] 

251 if ":" in tz: 

252 tzh, tzm = tz[1:].split(":") 

253 elif len(tz) == 4: # 'snnn' 

254 tzh, tzm = tz[1], tz[2:4] 

255 else: 

256 tzh, tzm = tz[1:3], tz[3:5] 

257 offset = int(tzm) + int(tzh) * 60 

258 if sign == "-": 

259 offset = -offset 

260 tz = FixedTzOffset(offset, tz) 

261 

262 if time_only: 

263 d = time(H, M, S, u, tzinfo=tz) 

264 else: 

265 y = int(y) 

266 if m.isdigit(): 

267 m = int(m) 

268 else: 

269 m = MONTHS_MAP[m] 

270 d = int(d) 

271 d = datetime(y, m, d, H, M, S, u, tzinfo=tz) 

272 

273 return d 

274 

275 

276def strf_date_convert(x, _, type): 

277 is_date = any("%" + x in type for x in "aAwdbBmyYjUW") 

278 is_time = any("%" + x in type for x in "HIpMSfz") 

279 

280 dt = datetime.strptime(x, type) 

281 if "%y" not in type and "%Y" not in type: # year not specified 

282 dt = dt.replace(year=datetime.today().year) 

283 

284 if is_date and is_time: 

285 return dt 

286 elif is_date: 

287 return dt.date() 

288 elif is_time: 

289 return dt.time() 

290 else: 

291 raise ValueError("Datetime not a date nor a time?") 

292 

293 

294# ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes 

295dt_format_to_regex = { 

296 "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)", 

297 "%A": "(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)", 

298 "%w": "[0-6]", 

299 "%d": "[0-9]{1,2}", 

300 "%b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)", 

301 "%B": "(?:January|February|March|April|May|June|July|August|September|October|November|December)", 

302 "%m": "[0-9]{1,2}", 

303 "%y": "[0-9]{2}", 

304 "%Y": "[0-9]{4}", 

305 "%H": "[0-9]{1,2}", 

306 "%I": "[0-9]{1,2}", 

307 "%p": "(?:AM|PM)", 

308 "%M": "[0-9]{2}", 

309 "%S": "[0-9]{2}", 

310 "%f": "[0-9]{1,6}", 

311 "%z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?", 

312 # "%Z": punt 

313 "%j": "[0-9]{1,3}", 

314 "%U": "[0-9]{1,2}", 

315 "%W": "[0-9]{1,2}", 

316} 

317 

318# Compile a regular expression pattern that matches any date/time format symbol. 

319dt_format_symbols_re = re.compile("|".join(dt_format_to_regex)) 

320 

321 

322def get_regex_for_datetime_format(format_): 

323 """ 

324 Generate a regex pattern for a given datetime format string. 

325 

326 Parameters: 

327 format_ (str): The datetime format string. 

328 

329 Returns: 

330 str: A regex pattern corresponding to the datetime format string. 

331 """ 

332 # Replace all format symbols with their regex patterns. 

333 return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)], format_) 

334 

335 

336class TooManyFields(ValueError): 

337 pass 

338 

339 

340class RepeatedNameError(ValueError): 

341 pass 

342 

343 

344# note: {} are handled separately 

345REGEX_SAFETY = re.compile(r"([?\\.[\]()*+^$!|])") 

346 

347# allowed field types 

348ALLOWED_TYPES = set(list("nbox%fFegwWdDsSl") + ["t" + c for c in "ieahgcts"]) 

349 

350 

351def extract_format(format, extra_types): 

352 """Pull apart the format [[fill]align][sign][0][width][.precision][type]""" 

353 fill = align = None 

354 if format[0] in "<>=^": 

355 align = format[0] 

356 format = format[1:] 

357 elif len(format) > 1 and format[1] in "<>=^": 

358 fill = format[0] 

359 align = format[1] 

360 format = format[2:] 

361 

362 if format.startswith(("+", "-", " ")): 

363 format = format[1:] 

364 

365 zero = False 

366 if format and format[0] == "0": 

367 zero = True 

368 format = format[1:] 

369 

370 width = "" 

371 while format: 

372 if not format[0].isdigit(): 

373 break 

374 width += format[0] 

375 format = format[1:] 

376 

377 if format.startswith("."): 

378 # Precision isn't needed but we need to capture it so that 

379 # the ValueError isn't raised. 

380 format = format[1:] # drop the '.' 

381 precision = "" 

382 while format: 

383 if not format[0].isdigit(): 

384 break 

385 precision += format[0] 

386 format = format[1:] 

387 

388 # the rest is the type, if present 

389 type = format 

390 if ( 

391 type 

392 and type not in ALLOWED_TYPES 

393 and type not in extra_types 

394 and not any(k in type for k in dt_format_to_regex) 

395 ): 

396 raise ValueError("format spec %r not recognised" % type) 

397 

398 return locals() 

399 

400 

401PARSE_RE = re.compile(r"({{|}}|{[\w-]*(?:\.[\w-]+|\[[^]]+])*(?::[^}]+)?})") 

402 

403 

404class Parser(object): 

405 """Encapsulate a format string that may be used to parse other strings.""" 

406 

407 def __init__(self, format, extra_types=None, case_sensitive=False): 

408 # a mapping of a name as in {hello.world} to a regex-group compatible 

409 # name, like hello__world. It's used to prevent the transformation of 

410 # name-to-group and group to name to fail subtly, such as in: 

411 # hello_.world-> hello___world->hello._world 

412 self._group_to_name_map = {} 

413 # also store the original field name to group name mapping to allow 

414 # multiple instances of a name in the format string 

415 self._name_to_group_map = {} 

416 # and to sanity check the repeated instances store away the first 

417 # field type specification for the named field 

418 self._name_types = {} 

419 

420 self._format = format 

421 if extra_types is None: 

422 extra_types = {} 

423 self._extra_types = extra_types 

424 if case_sensitive: 

425 self._re_flags = re.DOTALL 

426 else: 

427 self._re_flags = re.IGNORECASE | re.DOTALL 

428 self._fixed_fields = [] 

429 self._named_fields = [] 

430 self._group_index = 0 

431 self._type_conversions = {} 

432 self._expression = self._generate_expression() 

433 self.__search_re = None 

434 self.__match_re = None 

435 

436 log.debug("format %r -> %r", format, self._expression) 

437 

438 def __repr__(self): 

439 if len(self._format) > 20: 

440 return "<%s %r>" % (self.__class__.__name__, self._format[:17] + "...") 

441 return "<%s %r>" % (self.__class__.__name__, self._format) 

442 

443 @property 

444 def _search_re(self): 

445 if self.__search_re is None: 

446 try: 

447 self.__search_re = re.compile(self._expression, self._re_flags) 

448 except AssertionError: 

449 # access error through sys to keep py3k and backward compat 

450 e = str(sys.exc_info()[1]) 

451 if e.endswith("this version only supports 100 named groups"): 

452 raise TooManyFields( 

453 "sorry, you are attempting to parse too many complex fields" 

454 ) 

455 return self.__search_re 

456 

457 @property 

458 def _match_re(self): 

459 if self.__match_re is None: 

460 expression = r"\A%s\Z" % self._expression 

461 try: 

462 self.__match_re = re.compile(expression, self._re_flags) 

463 except AssertionError: 

464 # access error through sys to keep py3k and backward compat 

465 e = str(sys.exc_info()[1]) 

466 if e.endswith("this version only supports 100 named groups"): 

467 raise TooManyFields( 

468 "sorry, you are attempting to parse too many complex fields" 

469 ) 

470 except re.error: 

471 raise NotImplementedError( 

472 "Group names (e.g. (?P<name>) can " 

473 "cause failure, as they are not escaped properly: '%s'" % expression 

474 ) 

475 return self.__match_re 

476 

477 @property 

478 def named_fields(self): 

479 return self._named_fields[:] 

480 

481 @property 

482 def fixed_fields(self): 

483 return self._fixed_fields[:] 

484 

485 @property 

486 def format(self): 

487 return self._format 

488 

489 def parse(self, string, evaluate_result=True): 

490 """Match my format to the string exactly. 

491 

492 Return a Result or Match instance or None if there's no match. 

493 """ 

494 m = self._match_re.match(string) 

495 if m is None: 

496 return None 

497 

498 if evaluate_result: 

499 return self.evaluate_result(m) 

500 else: 

501 return Match(self, m) 

502 

503 def search(self, string, pos=0, endpos=None, evaluate_result=True): 

504 """Search the string for my format. 

505 

506 Optionally start the search at "pos" character index and limit the 

507 search to a maximum index of endpos - equivalent to 

508 search(string[:endpos]). 

509 

510 If the ``evaluate_result`` argument is set to ``False`` a 

511 Match instance is returned instead of the actual Result instance. 

512 

513 Return either a Result instance or None if there's no match. 

514 """ 

515 if endpos is None: 

516 endpos = len(string) 

517 m = self._search_re.search(string, pos, endpos) 

518 if m is None: 

519 return None 

520 

521 if evaluate_result: 

522 return self.evaluate_result(m) 

523 else: 

524 return Match(self, m) 

525 

526 def findall( 

527 self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True 

528 ): 

529 """Search "string" for all occurrences of "format". 

530 

531 Optionally start the search at "pos" character index and limit the 

532 search to a maximum index of endpos - equivalent to 

533 search(string[:endpos]). 

534 

535 Returns an iterator that holds Result or Match instances for each format match 

536 found. 

537 """ 

538 if endpos is None: 

539 endpos = len(string) 

540 return ResultIterator( 

541 self, string, pos, endpos, evaluate_result=evaluate_result 

542 ) 

543 

544 def _expand_named_fields(self, named_fields): 

545 result = {} 

546 for field, value in named_fields.items(): 

547 # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...' 

548 n = field.find("[") 

549 if n == -1: 

550 basename, subkeys = field, "" 

551 else: 

552 basename, subkeys = field[:n], field[n:] 

553 

554 # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}} 

555 d = result 

556 k = basename 

557 

558 if subkeys: 

559 for subkey in re.findall(r"\[[^]]+]", subkeys): 

560 d = d.setdefault(k, {}) 

561 k = subkey[1:-1] 

562 

563 # assign the value to the last key 

564 d[k] = value 

565 

566 return result 

567 

568 def evaluate_result(self, m): 

569 """Generate a Result instance for the given regex match object""" 

570 # ok, figure the fixed fields we've pulled out and type convert them 

571 fixed_fields = list(m.groups()) 

572 for n in self._fixed_fields: 

573 if n in self._type_conversions: 

574 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m) 

575 fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields) 

576 

577 # grab the named fields, converting where requested 

578 groupdict = m.groupdict() 

579 named_fields = {} 

580 name_map = {} 

581 for k in self._named_fields: 

582 korig = self._group_to_name_map[k] 

583 name_map[korig] = k 

584 if k in self._type_conversions: 

585 value = self._type_conversions[k](groupdict[k], m) 

586 else: 

587 value = groupdict[k] 

588 

589 named_fields[korig] = value 

590 

591 # now figure the match spans 

592 spans = {n: m.span(name_map[n]) for n in named_fields} 

593 spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields)) 

594 

595 # and that's our result 

596 return Result(fixed_fields, self._expand_named_fields(named_fields), spans) 

597 

598 def _regex_replace(self, match): 

599 return "\\" + match.group(1) 

600 

601 def _generate_expression(self): 

602 # turn my _format attribute into the _expression attribute 

603 e = [] 

604 for part in PARSE_RE.split(self._format): 

605 if not part: 

606 continue 

607 elif part == "{{": 

608 e.append(r"\{") 

609 elif part == "}}": 

610 e.append(r"\}") 

611 elif part[0] == "{" and part[-1] == "}": 

612 # this will be a braces-delimited field to handle 

613 e.append(self._handle_field(part)) 

614 else: 

615 # just some text to match 

616 e.append(REGEX_SAFETY.sub(self._regex_replace, part)) 

617 return "".join(e) 

618 

619 def _to_group_name(self, field): 

620 # return a version of field which can be used as capture group, even 

621 # though it might contain '.' 

622 group = field.replace(".", "_").replace("[", "_").replace("]", "_").replace("-", "_") 

623 

624 # make sure we don't collide ("a.b" colliding with "a_b") 

625 n = 1 

626 while group in self._group_to_name_map: 

627 n += 1 

628 if "." in field: 

629 group = field.replace(".", "_" * n) 

630 elif "_" in field: 

631 group = field.replace("_", "_" * n) 

632 elif "-" in field: 

633 group = field.replace("-", "_" * n) 

634 else: 

635 raise KeyError("duplicated group name %r" % (field,)) 

636 

637 # save off the mapping 

638 self._group_to_name_map[group] = field 

639 self._name_to_group_map[field] = group 

640 return group 

641 

642 def _handle_field(self, field): 

643 # first: lose the braces 

644 field = field[1:-1] 

645 

646 # now figure whether this is an anonymous or named field, and whether 

647 # there's any format specification 

648 format = "" 

649 

650 if ":" in field: 

651 name, format = field.split(":", 1) 

652 else: 

653 name = field 

654 

655 # This *should* be more flexible, but parsing complicated structures 

656 # out of the string is hard (and not necessarily useful) ... and I'm 

657 # being lazy. So for now `identifier` is "anything starting with a 

658 # letter" and digit args don't get attribute or element stuff. 

659 if name and name[0].isalpha(): 

660 if name in self._name_to_group_map: 

661 if self._name_types[name] != format: 

662 raise RepeatedNameError( 

663 'field type %r for field "%s" ' 

664 "does not match previous seen type %r" 

665 % (format, name, self._name_types[name]) 

666 ) 

667 group = self._name_to_group_map[name] 

668 # match previously-seen value 

669 return r"(?P=%s)" % group 

670 else: 

671 group = self._to_group_name(name) 

672 self._name_types[name] = format 

673 self._named_fields.append(group) 

674 # this will become a group, which must not contain dots 

675 wrap = r"(?P<%s>%%s)" % group 

676 else: 

677 self._fixed_fields.append(self._group_index) 

678 wrap = r"(%s)" 

679 group = self._group_index 

680 

681 # simplest case: no type specifier ({} or {name}) 

682 if not format: 

683 self._group_index += 1 

684 return wrap % r".+?" 

685 

686 # decode the format specification 

687 format = extract_format(format, self._extra_types) 

688 

689 # figure type conversions, if any 

690 type = format["type"] 

691 is_numeric = type and type in "n%fegdobx" 

692 conv = self._type_conversions 

693 if type in self._extra_types: 

694 type_converter = self._extra_types[type] 

695 s = getattr(type_converter, "pattern", r".+?") 

696 regex_group_count = getattr(type_converter, "regex_group_count", 0) 

697 if regex_group_count is None: 

698 regex_group_count = 0 

699 self._group_index += regex_group_count 

700 conv[group] = convert_first(type_converter) 

701 elif type == "n": 

702 s = r"\d{1,3}([,.]\d{3})*" 

703 self._group_index += 1 

704 conv[group] = int_convert(10) 

705 elif type == "b": 

706 s = r"(0[bB])?[01]+" 

707 conv[group] = int_convert(2) 

708 self._group_index += 1 

709 elif type == "o": 

710 s = r"(0[oO])?[0-7]+" 

711 conv[group] = int_convert(8) 

712 self._group_index += 1 

713 elif type == "x": 

714 s = r"(0[xX])?[0-9a-fA-F]+" 

715 conv[group] = int_convert(16) 

716 self._group_index += 1 

717 elif type == "%": 

718 s = r"\d+(\.\d+)?%" 

719 self._group_index += 1 

720 conv[group] = percentage 

721 elif type == "f": 

722 s = r"\d*\.\d+" 

723 conv[group] = convert_first(float) 

724 elif type == "F": 

725 s = r"\d*\.\d+" 

726 conv[group] = convert_first(Decimal) 

727 elif type == "e": 

728 s = r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF" 

729 conv[group] = convert_first(float) 

730 elif type == "g": 

731 s = r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF" 

732 self._group_index += 2 

733 conv[group] = convert_first(float) 

734 elif type == "d": 

735 if format.get("width"): 

736 width = r"{1,%s}" % int(format["width"]) 

737 else: 

738 width = "+" 

739 s = r"\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}".format( 

740 w=width 

741 ) 

742 conv[group] = int_convert() 

743 # do not specify number base, determine it automatically 

744 elif any(k in type for k in dt_format_to_regex): 

745 s = get_regex_for_datetime_format(type) 

746 conv[group] = partial(strf_date_convert, type=type) 

747 elif type == "ti": 

748 s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT 

749 n = self._group_index 

750 conv[group] = partial(date_convert, ymd=n + 1, hms=n + 4, tz=n + 7) 

751 self._group_index += 7 

752 elif type == "tg": 

753 s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?" 

754 s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) 

755 n = self._group_index 

756 conv[group] = partial( 

757 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 

758 ) 

759 self._group_index += 9 

760 elif type == "ta": 

761 s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?" 

762 s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT) 

763 n = self._group_index 

764 conv[group] = partial( 

765 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9 

766 ) 

767 self._group_index += 9 

768 elif type == "te": 

769 # this will allow microseconds through if they're present, but meh 

770 s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s" 

771 s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT, TZ_PAT) 

772 n = self._group_index 

773 conv[group] = partial(date_convert, dmy=n + 3, hms=n + 5, tz=n + 8) 

774 self._group_index += 8 

775 elif type == "th": 

776 # slight flexibility here from the stock Apache format 

777 s = r"(\d{1,2}[-/]%s[-/]\d{4}):%s%s" % (MONTHS_PAT, TIME_PAT, TZ_PAT) 

778 n = self._group_index 

779 conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6) 

780 self._group_index += 6 

781 elif type == "tc": 

782 s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})" 

783 s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT) 

784 n = self._group_index 

785 conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5) 

786 self._group_index += 8 

787 elif type == "tt": 

788 s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT) 

789 n = self._group_index 

790 conv[group] = partial(date_convert, hms=n + 1, am=n + 4, tz=n + 5) 

791 self._group_index += 5 

792 elif type == "ts": 

793 s = r"%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?" % MONTHS_PAT 

794 n = self._group_index 

795 conv[group] = partial(date_convert, mm=n + 1, dd=n + 3, hms=n + 5) 

796 self._group_index += 5 

797 elif type == "l": 

798 s = r"[A-Za-z]+" 

799 elif type: 

800 s = r"\%s+" % type 

801 elif format.get("precision"): 

802 if format.get("width"): 

803 s = r".{%s,%s}?" % (format["width"], format["precision"]) 

804 else: 

805 s = r".{1,%s}?" % format["precision"] 

806 elif format.get("width"): 

807 s = r".{%s,}?" % format["width"] 

808 else: 

809 s = r".+?" 

810 

811 align = format["align"] 

812 fill = format["fill"] 

813 

814 # handle some numeric-specific things like fill and sign 

815 if is_numeric: 

816 # prefix with something (align "=" trumps zero) 

817 if align == "=": 

818 # special case - align "=" acts like the zero above but with 

819 # configurable fill defaulting to "0" 

820 if not fill: 

821 fill = "0" 

822 s = r"%s*" % fill + s 

823 

824 # allow numbers to be prefixed with a sign 

825 s = r"[-+ ]?" + s 

826 

827 if not fill: 

828 fill = " " 

829 

830 # Place into a group now - this captures the value we want to keep. 

831 # Everything else from now is just padding to be stripped off 

832 if wrap: 

833 s = wrap % s 

834 self._group_index += 1 

835 

836 if format["width"]: 

837 # all we really care about is that if the format originally 

838 # specified a width then there will probably be padding - without 

839 # an explicit alignment that'll mean right alignment with spaces 

840 # padding 

841 if not align: 

842 align = ">" 

843 

844 if fill in r".\+?*[](){}^$": 

845 fill = "\\" + fill 

846 

847 # align "=" has been handled 

848 if align == "<": 

849 s = "%s%s*" % (s, fill) 

850 elif align == ">": 

851 s = "%s*%s" % (fill, s) 

852 elif align == "^": 

853 s = "%s*%s%s*" % (fill, s, fill) 

854 

855 return s 

856 

857 

858class Result(object): 

859 """The result of a parse() or search(). 

860 

861 Fixed results may be looked up using `result[index]`. 

862 Slices of fixed results may also be looked up. 

863 

864 Named results may be looked up using `result['name']`. 

865 

866 Named results may be tested for existence using `'name' in result`. 

867 """ 

868 

869 def __init__(self, fixed, named, spans): 

870 self.fixed = fixed 

871 self.named = named 

872 self.spans = spans 

873 

874 def __getitem__(self, item): 

875 if isinstance(item, (int, slice)): 

876 return self.fixed[item] 

877 return self.named[item] 

878 

879 def __repr__(self): 

880 return "<%s %r %r>" % (self.__class__.__name__, self.fixed, self.named) 

881 

882 def __contains__(self, name): 

883 return name in self.named 

884 

885 

886class Match(object): 

887 """The result of a parse() or search() if no results are generated. 

888 

889 This class is only used to expose internal used regex match objects 

890 to the user and use them for external Parser.evaluate_result calls. 

891 """ 

892 

893 def __init__(self, parser, match): 

894 self.parser = parser 

895 self.match = match 

896 

897 def evaluate_result(self): 

898 """Generate results for this Match""" 

899 return self.parser.evaluate_result(self.match) 

900 

901 

902class ResultIterator(object): 

903 """The result of a findall() operation. 

904 

905 Each element is a Result instance. 

906 """ 

907 

908 def __init__(self, parser, string, pos, endpos, evaluate_result=True): 

909 self.parser = parser 

910 self.string = string 

911 self.pos = pos 

912 self.endpos = endpos 

913 self.evaluate_result = evaluate_result 

914 

915 def __iter__(self): 

916 return self 

917 

918 def __next__(self): 

919 m = self.parser._search_re.search(self.string, self.pos, self.endpos) 

920 if m is None: 

921 raise StopIteration() 

922 self.pos = m.end() 

923 

924 if self.evaluate_result: 

925 return self.parser.evaluate_result(m) 

926 else: 

927 return Match(self.parser, m) 

928 

929 # pre-py3k compat 

930 next = __next__ 

931 

932 

933def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False): 

934 """Using "format" attempt to pull values from "string". 

935 

936 The format must match the string contents exactly. If the value 

937 you're looking for is instead just a part of the string use 

938 search(). 

939 

940 If ``evaluate_result`` is True the return value will be an Result instance with two attributes: 

941 

942 .fixed - tuple of fixed-position values from the string 

943 .named - dict of named values from the string 

944 

945 If ``evaluate_result`` is False the return value will be a Match instance with one method: 

946 

947 .evaluate_result() - This will return a Result instance like you would get 

948 with ``evaluate_result`` set to True 

949 

950 The default behaviour is to match strings case insensitively. You may match with 

951 case by specifying case_sensitive=True. 

952 

953 If the format is invalid a ValueError will be raised. 

954 

955 See the module documentation for the use of "extra_types". 

956 

957 In the case there is no match parse() will return None. 

958 """ 

959 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

960 return p.parse(string, evaluate_result=evaluate_result) 

961 

962 

963def search( 

964 format, 

965 string, 

966 pos=0, 

967 endpos=None, 

968 extra_types=None, 

969 evaluate_result=True, 

970 case_sensitive=False, 

971): 

972 """Search "string" for the first occurrence of "format". 

973 

974 The format may occur anywhere within the string. If 

975 instead you wish for the format to exactly match the string 

976 use parse(). 

977 

978 Optionally start the search at "pos" character index and limit the search 

979 to a maximum index of endpos - equivalent to search(string[:endpos]). 

980 

981 If ``evaluate_result`` is True the return value will be an Result instance with two attributes: 

982 

983 .fixed - tuple of fixed-position values from the string 

984 .named - dict of named values from the string 

985 

986 If ``evaluate_result`` is False the return value will be a Match instance with one method: 

987 

988 .evaluate_result() - This will return a Result instance like you would get 

989 with ``evaluate_result`` set to True 

990 

991 The default behaviour is to match strings case insensitively. You may match with 

992 case by specifying case_sensitive=True. 

993 

994 If the format is invalid a ValueError will be raised. 

995 

996 See the module documentation for the use of "extra_types". 

997 

998 In the case there is no match parse() will return None. 

999 """ 

1000 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1001 return p.search(string, pos, endpos, evaluate_result=evaluate_result) 

1002 

1003 

1004def findall( 

1005 format, 

1006 string, 

1007 pos=0, 

1008 endpos=None, 

1009 extra_types=None, 

1010 evaluate_result=True, 

1011 case_sensitive=False, 

1012): 

1013 """Search "string" for all occurrences of "format". 

1014 

1015 You will be returned an iterator that holds Result instances 

1016 for each format match found. 

1017 

1018 Optionally start the search at "pos" character index and limit the search 

1019 to a maximum index of endpos - equivalent to search(string[:endpos]). 

1020 

1021 If ``evaluate_result`` is True each returned Result instance has two attributes: 

1022 

1023 .fixed - tuple of fixed-position values from the string 

1024 .named - dict of named values from the string 

1025 

1026 If ``evaluate_result`` is False each returned value is a Match instance with one method: 

1027 

1028 .evaluate_result() - This will return a Result instance like you would get 

1029 with ``evaluate_result`` set to True 

1030 

1031 The default behaviour is to match strings case insensitively. You may match with 

1032 case by specifying case_sensitive=True. 

1033 

1034 If the format is invalid a ValueError will be raised. 

1035 

1036 See the module documentation for the use of "extra_types". 

1037 """ 

1038 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1039 return p.findall(string, pos, endpos, evaluate_result=evaluate_result) 

1040 

1041 

1042def compile(format, extra_types=None, case_sensitive=False): 

1043 """Create a Parser instance to parse "format". 

1044 

1045 The resultant Parser has a method .parse(string) which 

1046 behaves in the same manner as parse(format, string). 

1047 

1048 The default behaviour is to match strings case insensitively. You may match with 

1049 case by specifying case_sensitive=True. 

1050 

1051 Use this function if you intend to parse many strings 

1052 with the same format. 

1053 

1054 See the module documentation for the use of "extra_types". 

1055 

1056 Returns a Parser instance. 

1057 """ 

1058 return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive) 

1059 

1060 

1061# Copyright (c) 2012-2020 Richard Jones <richard@python.org> 

1062# 

1063# Permission is hereby granted, free of charge, to any person obtaining a copy 

1064# of this software and associated documentation files (the "Software"), to deal 

1065# in the Software without restriction, including without limitation the rights 

1066# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 

1067# copies of the Software, and to permit persons to whom the Software is 

1068# furnished to do so, subject to the following conditions: 

1069# 

1070# The above copyright notice and this permission notice shall be included in 

1071# all copies or substantial portions of the Software. 

1072# 

1073# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 

1074# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 

1075# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 

1076# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 

1077# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 

1078# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 

1079# SOFTWARE. 

1080 

1081# vim: set filetype=python ts=4 sw=4 et si tw=75