Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/parse.py: 79%

1from __future__ import absolute_import

3import logging

4import re

5import sys

6from datetime import datetime

7from datetime import time

8from datetime import timedelta

9from datetime import tzinfo

10from decimal import Decimal

11from functools import partial

14__version__ = "1.21.0"

15__all__ = ["parse", "search", "findall", "with_pattern"]

17log = logging.getLogger(__name__)

20def with_pattern(pattern, regex_group_count=None):

21 r"""Attach a regular expression pattern matcher to a custom type converter

22 function.

24 This annotates the type converter with the :attr:`pattern` attribute.

26 EXAMPLE:

27 >>> import parse

28 >>> @parse.with_pattern(r"\d+")

29 ... def parse_number(text):

30 ... return int(text)

32 is equivalent to:

34 >>> def parse_number(text):

35 ... return int(text)

36 >>> parse_number.pattern = r"\d+"

38 :param pattern: regular expression pattern (as text)

39 :param regex_group_count: Indicates how many regex-groups are in pattern.

40 :return: wrapped function

41 """

43 def decorator(func):

44 func.pattern = pattern

45 func.regex_group_count = regex_group_count

46 return func

48 return decorator

51class int_convert:

52 """Convert a string to an integer.

54 The string may start with a sign.

56 It may be of a base other than 2, 8, 10 or 16.

58 If base isn't specified, it will be detected automatically based

59 on a string format. When string starts with a base indicator, 0#nnnn,

60 it overrides the default base of 10.

62 It may also have other non-numeric characters that we can ignore.

63 """

65 CHARS = "0123456789abcdefghijklmnopqrstuvwxyz"

67 def __init__(self, base=None):

68 self.base = base

70 def __call__(self, string, match):

71 if string[0] == "-":

72 sign = -1

73 number_start = 1

74 elif string[0] == "+":

75 sign = 1

76 number_start = 1

77 else:

78 sign = 1

79 number_start = 0

81 base = self.base

82 # If base wasn't specified, detect it automatically

83 if base is None:

84 # Assume decimal number, unless different base is detected

85 base = 10

87 # For number formats starting with 0b, 0o, 0x, use corresponding base ...

88 if string[number_start] == "0" and len(string) - number_start > 2:

89 if string[number_start + 1] in "bB":

90 base = 2

91 elif string[number_start + 1] in "oO":

92 base = 8

93 elif string[number_start + 1] in "xX":

94 base = 16

96 chars = int_convert.CHARS[:base]

97 string = re.sub("[^%s]" % chars, "", string.lower())

98 return sign * int(string, base)

100

101class convert_first:

102 """Convert the first element of a pair.

103 This equivalent to lambda s,m: converter(s). But unlike a lambda function, it can be pickled

104 """

105

106 def __init__(self, converter):

107 self.converter = converter

108

109 def __call__(self, string, match):

110 return self.converter(string)

111

112

113def percentage(string, match):

114 return float(string[:-1]) / 100.0

115

116

117class FixedTzOffset(tzinfo):

118 """Fixed offset in minutes east from UTC."""

119

120 ZERO = timedelta(0)

121

122 def __init__(self, offset, name):

123 self._offset = timedelta(minutes=offset)

124 self._name = name

125

126 def __repr__(self):

127 return "<%s %s %s>" % (self.__class__.__name__, self._name, self._offset)

128

129 def utcoffset(self, dt):

130 return self._offset

131

132 def tzname(self, dt):

133 return self._name

134

135 def dst(self, dt):

136 return self.ZERO

137

138 def __eq__(self, other):

139 if not isinstance(other, FixedTzOffset):

140 return NotImplemented

141 return self._name == other._name and self._offset == other._offset

142

143

144MONTHS_MAP = {

145 "Jan": 1,

146 "January": 1,

147 "Feb": 2,

148 "February": 2,

149 "Mar": 3,

150 "March": 3,

151 "Apr": 4,

152 "April": 4,

153 "May": 5,

154 "Jun": 6,

155 "June": 6,

156 "Jul": 7,

157 "July": 7,

158 "Aug": 8,

159 "August": 8,

160 "Sep": 9,

161 "September": 9,

162 "Oct": 10,

163 "October": 10,

164 "Nov": 11,

165 "November": 11,

166 "Dec": 12,

167 "December": 12,

168}

169DAYS_PAT = r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"

170MONTHS_PAT = r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"

171ALL_MONTHS_PAT = r"(%s)" % "|".join(MONTHS_MAP)

172TIME_PAT = r"(\d{1,2}:\d{1,2}(:\d{1,2}(\.\d+)?)?)"

173AM_PAT = r"(\s+[AP]M)"

174TZ_PAT = r"(\s+[-+]\d\d?:?\d\d)"

175

176

177def date_convert(

178 string,

179 match,

180 ymd=None,

181 mdy=None,

182 dmy=None,

183 d_m_y=None,

184 hms=None,

185 am=None,

186 tz=None,

187 mm=None,

188 dd=None,

189):

190 """Convert the incoming string containing some date / time info into a

191 datetime instance.

192 """

193 groups = match.groups()

194 time_only = False

195 if mm and dd:

196 y = datetime.today().year

197 m = groups[mm]

198 d = groups[dd]

199 elif ymd is not None:

200 y, m, d = re.split(r"[-/\s]", groups[ymd])

201 elif mdy is not None:

202 m, d, y = re.split(r"[-/\s]", groups[mdy])

203 elif dmy is not None:

204 d, m, y = re.split(r"[-/\s]", groups[dmy])

205 elif d_m_y is not None:

206 d, m, y = d_m_y

207 d = groups[d]

208 m = groups[m]

209 y = groups[y]

210 else:

211 time_only = True

212

213 H = M = S = u = 0

214 if hms is not None and groups[hms]:

215 t = groups[hms].split(":")

216 if len(t) == 2:

217 H, M = t

218 else:

219 H, M, S = t

220 if "." in S:

221 S, u = S.split(".")

222 u = int(float("." + u) * 1000000)

223 S = int(S)

224 H = int(H)

225 M = int(M)

226

227 if am is not None:

228 am = groups[am]

229 if am:

230 am = am.strip()

231 if am == "AM" and H == 12:

232 # correction for "12" hour functioning as "0" hour: 12:15 AM = 00:15 by 24 hr clock

233 H -= 12

234 elif am == "PM" and H == 12:

235 # no correction needed: 12PM is midday, 12:00 by 24 hour clock

236 pass

237 elif am == "PM":

238 H += 12

239

240 if tz is not None:

241 tz = groups[tz]

242 if tz == "Z":

243 tz = FixedTzOffset(0, "UTC")

244 elif tz:

245 tz = tz.strip()

246 if tz.isupper():

247 # TODO use the awesome python TZ module?

248 pass

249 else:

250 sign = tz[0]

251 if ":" in tz:

252 tzh, tzm = tz[1:].split(":")

253 elif len(tz) == 4: # 'snnn'

254 tzh, tzm = tz[1], tz[2:4]

255 else:

256 tzh, tzm = tz[1:3], tz[3:5]

257 offset = int(tzm) + int(tzh) * 60

258 if sign == "-":

259 offset = -offset

260 tz = FixedTzOffset(offset, tz)

261

262 if time_only:

263 d = time(H, M, S, u, tzinfo=tz)

264 else:

265 y = int(y)

266 if m.isdigit():

267 m = int(m)

268 else:

269 m = MONTHS_MAP[m]

270 d = int(d)

271 d = datetime(y, m, d, H, M, S, u, tzinfo=tz)

272

273 return d

274

275

276def strf_date_convert(x, _, type):

277 is_date = any("%" + x in type for x in "aAwdbBmyYjUW")

278 is_time = any("%" + x in type for x in "HIpMSfz")

279

280 dt = datetime.strptime(x, type)

281 if "%y" not in type and "%Y" not in type: # year not specified

282 dt = dt.replace(year=datetime.today().year)

283

284 if is_date and is_time:

285 return dt

286 elif is_date:

287 return dt.date()

288 elif is_time:

289 return dt.time()

290 else:

291 raise ValueError("Datetime not a date nor a time?")

292

293

294# ref: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes

295dt_format_to_regex = {

296 "%a": "(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)",

298 "%w": "[0-6]",

299 "%d": "[0-9]{1,2}",

300 "%b": "(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)",

302 "%m": "[0-9]{1,2}",

303 "%y": "[0-9]{2}",

304 "%Y": "[0-9]{4}",

305 "%H": "[0-9]{1,2}",

306 "%I": "[0-9]{1,2}",

307 "%p": "(?:AM|PM)",

308 "%M": "[0-9]{2}",

309 "%S": "[0-9]{2}",

310 "%f": "[0-9]{1,6}",

311 "%z": "[+|-][0-9]{2}(:?[0-9]{2})?(:?[0-9]{2})?",

312 # "%Z": punt

313 "%j": "[0-9]{1,3}",

314 "%U": "[0-9]{1,2}",

315 "%W": "[0-9]{1,2}",

316}

317

318# Compile a regular expression pattern that matches any date/time format symbol.

319dt_format_symbols_re = re.compile("|".join(dt_format_to_regex))

320

321

322def get_regex_for_datetime_format(format_):

323 """

324 Generate a regex pattern for a given datetime format string.

325

326 Parameters:

327 format_ (str): The datetime format string.

328

329 Returns:

330 str: A regex pattern corresponding to the datetime format string.

331 """

332 # Replace all format symbols with their regex patterns.

333 return dt_format_symbols_re.sub(lambda m: dt_format_to_regex[m.group(0)], format_)

334

335

336class TooManyFields(ValueError):

337 pass

338

339

340class RepeatedNameError(ValueError):

341 pass

342

343

344# note: {} are handled separately

345REGEX_SAFETY = re.compile(r"([?\\.[\]()*+^$!|])")

346

347# allowed field types

348ALLOWED_TYPES = set(list("nbox%fFegwWdDsSl") + ["t" + c for c in "ieahgcts"])

349

350

351def extract_format(format, extra_types):

352 """Pull apart the format [[fill]align][sign][0][width][grouping][.precision][type]"""

353 fill = align = None

354 if format[0] in "<>=^":

355 align = format[0]

356 format = format[1:]

357 elif len(format) > 1 and format[1] in "<>=^":

358 fill = format[0]

359 align = format[1]

360 format = format[2:]

361

362 if format.startswith(("+", "-", " ")):

363 format = format[1:]

364

365 zero = False

366 if format and format[0] == "0":

367 zero = True

368 format = format[1:]

369

370 width = ""

371 while format:

372 if not format[0].isdigit():

373 break

374 width += format[0]

375 format = format[1:]

376

377 # Extract grouping option

378 if format.startswith(","):

379 format = format[1:]

380 grouping = ","

381 elif format.startswith("_"):

382 format = format[1:]

383 grouping = "_"

384

385 if format.startswith("."):

386 # Precision isn't needed but we need to capture it so that

387 # the ValueError isn't raised.

388 format = format[1:] # drop the '.'

389 precision = ""

390 while format:

391 if not format[0].isdigit():

392 break

393 precision += format[0]

394 format = format[1:]

395

396 # the rest is the type, if present

397 type = format

398 if (

399 type

400 and type not in ALLOWED_TYPES

401 and type not in extra_types

402 and not any(k in type for k in dt_format_to_regex)

403 ):

404 raise ValueError("format spec %r not recognised" % type)

405

406 return locals()

407

408

409PARSE_RE = re.compile(r"({{|}}|{[\w-]*(?:\.[\w-]+|\[[^]]+])*(?::[^}]+)?})")

410

411

412class Parser(object):

413 """Encapsulate a format string that may be used to parse other strings."""

414

415 def __init__(self, format, extra_types=None, case_sensitive=False):

416 # a mapping of a name as in {hello.world} to a regex-group compatible

417 # name, like hello__world. It's used to prevent the transformation of

418 # name-to-group and group to name to fail subtly, such as in:

419 # hello_.world-> hello___world->hello._world

420 self._group_to_name_map = {}

421 # also store the original field name to group name mapping to allow

422 # multiple instances of a name in the format string

423 self._name_to_group_map = {}

424 # and to sanity check the repeated instances store away the first

425 # field type specification for the named field

426 self._name_types = {}

427

428 self._format = format

429 if extra_types is None:

430 extra_types = {}

431 self._extra_types = extra_types

432 if case_sensitive:

433 self._re_flags = re.DOTALL

434 else:

435 self._re_flags = re.IGNORECASE | re.DOTALL

436 self._fixed_fields = []

437 self._named_fields = []

438 self._group_index = 0

439 self._type_conversions = {}

440 self._expression = self._generate_expression()

441 self.__search_re = None

442 self.__match_re = None

443

444 log.debug("format %r -> %r", format, self._expression)

445

446 def __repr__(self):

447 if len(self._format) > 20:

448 return "<%s %r>" % (self.__class__.__name__, self._format[:17] + "...")

449 return "<%s %r>" % (self.__class__.__name__, self._format)

450

451 @property

452 def _search_re(self):

453 if self.__search_re is None:

454 try:

455 self.__search_re = re.compile(self._expression, self._re_flags)

456 except AssertionError:

457 # access error through sys to keep py3k and backward compat

458 e = str(sys.exc_info()[1])

459 if e.endswith("this version only supports 100 named groups"):

460 raise TooManyFields(

461 "sorry, you are attempting to parse too many complex fields"

462 )

463 return self.__search_re

464

465 @property

466 def _match_re(self):

467 if self.__match_re is None:

468 expression = r"\A%s\Z" % self._expression

469 try:

470 self.__match_re = re.compile(expression, self._re_flags)

471 except AssertionError:

472 # access error through sys to keep py3k and backward compat

473 e = str(sys.exc_info()[1])

474 if e.endswith("this version only supports 100 named groups"):

475 raise TooManyFields(

476 "sorry, you are attempting to parse too many complex fields"

477 )

478 except re.error:

479 raise NotImplementedError(

480 "Group names (e.g. (?P<name>) can "

481 "cause failure, as they are not escaped properly: '%s'" % expression

482 )

483 return self.__match_re

484

485 @property

486 def named_fields(self):

487 return self._named_fields[:]

488

489 @property

490 def fixed_fields(self):

491 return self._fixed_fields[:]

492

493 @property

494 def format(self):

495 return self._format

496

497 def parse(self, string, evaluate_result=True):

498 """Match my format to the string exactly.

499

500 Return a Result or Match instance or None if there's no match.

501 """

502 m = self._match_re.match(string)

503 if m is None:

504 return None

505

506 if evaluate_result:

507 return self.evaluate_result(m)

508 else:

509 return Match(self, m)

510

511 def search(self, string, pos=0, endpos=None, evaluate_result=True):

512 """Search the string for my format.

513

514 Optionally start the search at "pos" character index and limit the

515 search to a maximum index of endpos - equivalent to

516 search(string[:endpos]).

517

518 If the ``evaluate_result`` argument is set to ``False`` a

519 Match instance is returned instead of the actual Result instance.

520

521 Return either a Result instance or None if there's no match.

522 """

523 if endpos is None:

524 endpos = len(string)

525 m = self._search_re.search(string, pos, endpos)

526 if m is None:

527 return None

528

529 if evaluate_result:

530 return self.evaluate_result(m)

531 else:

532 return Match(self, m)

533

534 def findall(

535 self, string, pos=0, endpos=None, extra_types=None, evaluate_result=True

536 ):

537 """Search "string" for all occurrences of "format".

538

539 Optionally start the search at "pos" character index and limit the

540 search to a maximum index of endpos - equivalent to

541 search(string[:endpos]).

542

543 Returns an iterator that holds Result or Match instances for each format match

544 found.

545 """

546 if endpos is None:

547 endpos = len(string)

548 return ResultIterator(

549 self, string, pos, endpos, evaluate_result=evaluate_result

550 )

551

552 def _expand_named_fields(self, named_fields):

553 result = {}

554 for field, value in named_fields.items():

555 # split 'aaa[bbb][ccc]...' into 'aaa' and '[bbb][ccc]...'

556 n = field.find("[")

557 if n == -1:

558 basename, subkeys = field, ""

559 else:

560 basename, subkeys = field[:n], field[n:]

561

562 # create nested dictionaries {'aaa': {'bbb': {'ccc': ...}}}

563 d = result

564 k = basename

565

566 if subkeys:

567 for subkey in re.findall(r"\[[^]]+]", subkeys):

568 d = d.setdefault(k, {})

569 k = subkey[1:-1]

570

571 # assign the value to the last key

572 d[k] = value

573

574 return result

575

576 def evaluate_result(self, m):

577 """Generate a Result instance for the given regex match object"""

578 # ok, figure the fixed fields we've pulled out and type convert them

579 fixed_fields = list(m.groups())

580 for n in self._fixed_fields:

581 if n in self._type_conversions:

582 fixed_fields[n] = self._type_conversions[n](fixed_fields[n], m)

583 fixed_fields = tuple(fixed_fields[n] for n in self._fixed_fields)

584

585 # grab the named fields, converting where requested

586 groupdict = m.groupdict()

587 named_fields = {}

588 name_map = {}

589 for k in self._named_fields:

590 korig = self._group_to_name_map[k]

591 name_map[korig] = k

592 if k in self._type_conversions:

593 value = self._type_conversions[k](groupdict[k], m)

594 else:

595 value = groupdict[k]

596

597 named_fields[korig] = value

598

599 # now figure the match spans

600 spans = {n: m.span(name_map[n]) for n in named_fields}

601 spans.update((i, m.span(n + 1)) for i, n in enumerate(self._fixed_fields))

602

603 # and that's our result

604 return Result(fixed_fields, self._expand_named_fields(named_fields), spans)

605

606 def _regex_replace(self, match):

607 return "\\" + match.group(1)

608

609 def _generate_expression(self):

610 # turn my _format attribute into the _expression attribute

611 e = []

612 for part in PARSE_RE.split(self._format):

613 if not part:

614 continue

615 elif part == "{{":

616 e.append(r"\{")

617 elif part == "}}":

618 e.append(r"\}")

619 elif part[0] == "{" and part[-1] == "}":

620 # this will be a braces-delimited field to handle

621 e.append(self._handle_field(part))

622 else:

623 # just some text to match

624 e.append(REGEX_SAFETY.sub(self._regex_replace, part))

625 return "".join(e)

626

627 def _to_group_name(self, field):

628 # return a version of field which can be used as capture group, even

629 # though it might contain '.'

630 group = field.replace(".", "_").replace("[", "_").replace("]", "_").replace("-", "_")

631

632 # make sure we don't collide ("a.b" colliding with "a_b")

633 n = 1

634 while group in self._group_to_name_map:

635 n += 1

636 if "." in field:

637 group = field.replace(".", "_" * n)

638 elif "_" in field:

639 group = field.replace("_", "_" * n)

640 elif "-" in field:

641 group = field.replace("-", "_" * n)

642 else:

643 raise KeyError("duplicated group name %r" % (field,))

644

645 # save off the mapping

646 self._group_to_name_map[group] = field

647 self._name_to_group_map[field] = group

648 return group

649

650 def _handle_field(self, field):

651 # first: lose the braces

652 field = field[1:-1]

653

654 # now figure whether this is an anonymous or named field, and whether

655 # there's any format specification

656 format = ""

657

658 if ":" in field:

659 name, format = field.split(":", 1)

660 else:

661 name = field

662

663 # This *should* be more flexible, but parsing complicated structures

664 # out of the string is hard (and not necessarily useful) ... and I'm

665 # being lazy. So for now `identifier` is "anything starting with a

666 # letter" and digit args don't get attribute or element stuff.

667 if name and name[0].isalpha():

668 if name in self._name_to_group_map:

669 if self._name_types[name] != format:

670 raise RepeatedNameError(

671 'field type %r for field "%s" '

672 "does not match previous seen type %r"

673 % (format, name, self._name_types[name])

674 )

675 group = self._name_to_group_map[name]

676 # match previously-seen value

677 return r"(?P=%s)" % group

678 else:

679 group = self._to_group_name(name)

680 self._name_types[name] = format

681 self._named_fields.append(group)

682 # this will become a group, which must not contain dots

683 wrap = r"(?P<%s>%%s)" % group

684 else:

685 self._fixed_fields.append(self._group_index)

686 wrap = r"(%s)"

687 group = self._group_index

688

689 # simplest case: no type specifier ({} or {name})

690 if not format:

691 self._group_index += 1

692 return wrap % r".+?"

693

694 # decode the format specification

695 format = extract_format(format, self._extra_types)

696

697 # figure type conversions, if any

698 type = format["type"]

699 is_numeric = type and type in "n%fegdobx"

700 conv = self._type_conversions

701 if type in self._extra_types:

702 type_converter = self._extra_types[type]

703 s = getattr(type_converter, "pattern", r".+?")

704 regex_group_count = getattr(type_converter, "regex_group_count", 0)

705 if regex_group_count is None:

706 regex_group_count = 0

707 self._group_index += regex_group_count

708 conv[group] = convert_first(type_converter)

709 elif type == "n":

710 s = r"\d{1,3}([,.]\d{3})*"

711 self._group_index += 1

712 conv[group] = int_convert(10)

713 elif type == "b":

714 s = r"(0[bB])?[01]+"

715 conv[group] = int_convert(2)

716 self._group_index += 1

717 elif type == "o":

718 s = r"(0[oO])?[0-7]+"

719 conv[group] = int_convert(8)

720 self._group_index += 1

721 elif type == "x":

722 s = r"(0[xX])?[0-9a-fA-F]+"

723 conv[group] = int_convert(16)

724 self._group_index += 1

725 elif type == "%":

726 s = r"\d+(\.\d+)?%"

727 self._group_index += 1

728 conv[group] = percentage

729 elif type == "f":

730 s = r"\d*\.\d+"

731 conv[group] = convert_first(float)

732 elif type == "F":

733 s = r"\d*\.\d+"

734 conv[group] = convert_first(Decimal)

735 elif type == "e":

736 s = r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF"

737 conv[group] = convert_first(float)

738 elif type == "g":

739 s = r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF"

740 self._group_index += 2

741 conv[group] = convert_first(float)

742 elif type == "d":

743 if format.get("width"):

744 width = r"{1,%s}" % int(format["width"])

745 else:

746 width = "+"

747 s = r"[-+ ]?[0-9{g}]{w}|[-+ ]?0[xX][0-9a-fA-F{g}]{w}|[-+ ]?0[bB][01{g}]{w}|[-+ ]?0[oO][0-7{g}]{w}".format(

748 w=width,

749 g=format.get("grouping", ""),

750 )

751 conv[group] = int_convert()

752 # do not specify number base, determine it automatically

753 elif any(k in type for k in dt_format_to_regex):

754 s = get_regex_for_datetime_format(type)

755 conv[group] = partial(strf_date_convert, type=type)

756 elif type == "ti":

757 s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT

758 n = self._group_index

759 conv[group] = partial(date_convert, ymd=n + 1, hms=n + 4, tz=n + 7)

760 self._group_index += 7

761 elif type == "tg":

762 s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?"

763 s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT)

764 n = self._group_index

765 conv[group] = partial(

766 date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9

767 )

768 self._group_index += 9

769 elif type == "ta":

770 s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?"

771 s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT)

772 n = self._group_index

773 conv[group] = partial(

774 date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9

775 )

776 self._group_index += 9

777 elif type == "te":

778 # this will allow microseconds through if they're present, but meh

779 s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s"

780 s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT, TZ_PAT)

781 n = self._group_index

782 conv[group] = partial(date_convert, dmy=n + 3, hms=n + 5, tz=n + 8)

783 self._group_index += 8

784 elif type == "th":

785 # slight flexibility here from the stock Apache format

786 s = r"(\d{1,2}[-/]%s[-/]\d{4}):%s%s" % (MONTHS_PAT, TIME_PAT, TZ_PAT)

787 n = self._group_index

788 conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6)

789 self._group_index += 6

790 elif type == "tc":

791 s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})"

792 s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT)

793 n = self._group_index

794 conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5)

795 self._group_index += 8

796 elif type == "tt":

797 s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT)

798 n = self._group_index

799 conv[group] = partial(date_convert, hms=n + 1, am=n + 4, tz=n + 5)

800 self._group_index += 5

801 elif type == "ts":

802 s = r"%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?" % MONTHS_PAT

803 n = self._group_index

804 conv[group] = partial(date_convert, mm=n + 1, dd=n + 3, hms=n + 5)

805 self._group_index += 5

806 elif type == "l":

807 s = r"[A-Za-z]+"

808 elif type:

809 s = r"\%s+" % type

810 elif format.get("precision"):

811 if format.get("width"):

812 s = r".{%s,%s}?" % (format["width"], format["precision"])

813 else:

814 s = r".{1,%s}?" % format["precision"]

815 elif format.get("width"):

816 s = r".{%s,}?" % format["width"]

817 else:

818 s = r".+?"

819

820 align = format["align"]

821 fill = format["fill"]

822

823 # handle some numeric-specific things like fill and sign

824 if is_numeric:

825 # prefix with something (align "=" trumps zero)

826 if align == "=":

827 # special case - align "=" acts like the zero above but with

828 # configurable fill defaulting to "0"

829 if not fill:

830 fill = "0"

831 s = r"%s*" % fill + s

832

833 # allow numbers to be prefixed with a sign

834 s = r"[-+ ]?" + s

835

836 if not fill:

837 fill = " "

838

839 # Place into a group now - this captures the value we want to keep.

840 # Everything else from now is just padding to be stripped off

841 if wrap:

842 s = wrap % s

843 self._group_index += 1

844

845 if format["width"]:

846 # all we really care about is that if the format originally

847 # specified a width then there will probably be padding - without

848 # an explicit alignment that'll mean right alignment with spaces

849 # padding

850 if not align:

851 align = ">"

852

853 if fill in r".\+?*[](){}^$":

854 fill = "\\" + fill

855

856 # align "=" has been handled

857 if align == "<":

858 s = "%s%s*" % (s, fill)

859 elif align == ">":

860 s = "%s*%s" % (fill, s)

861 elif align == "^":

862 s = "%s*%s%s*" % (fill, s, fill)

863

864 return s

865

866

867class Result(object):

868 """The result of a parse() or search().

869

870 Fixed results may be looked up using `result[index]`.

871 Slices of fixed results may also be looked up.

872

873 Named results may be looked up using `result['name']`.

874

875 Named results may be tested for existence using `'name' in result`.

876 """

877

878 def __init__(self, fixed, named, spans):

879 self.fixed = fixed

880 self.named = named

881 self.spans = spans

882

883 def __getitem__(self, item):

884 if isinstance(item, (int, slice)):

885 return self.fixed[item]

886 return self.named[item]

887

888 def __repr__(self):

889 return "<%s %r %r>" % (self.__class__.__name__, self.fixed, self.named)

890

891 def __contains__(self, name):

892 return name in self.named

893

894

895class Match(object):

896 """The result of a parse() or search() if no results are generated.

897

898 This class is only used to expose internal used regex match objects

899 to the user and use them for external Parser.evaluate_result calls.

900 """

901

902 def __init__(self, parser, match):

903 self.parser = parser

904 self.match = match

905

906 def evaluate_result(self):

907 """Generate results for this Match"""

908 return self.parser.evaluate_result(self.match)

909

910

911class ResultIterator(object):

912 """The result of a findall() operation.

913

914 Each element is a Result instance.

915 """

916

917 def __init__(self, parser, string, pos, endpos, evaluate_result=True):

918 self.parser = parser

919 self.string = string

920 self.pos = pos

921 self.endpos = endpos

922 self.evaluate_result = evaluate_result

923

924 def __iter__(self):

925 return self

926

927 def __next__(self):

928 m = self.parser._search_re.search(self.string, self.pos, self.endpos)

929 if m is None:

930 raise StopIteration()

931 self.pos = m.end()

932

933 if self.evaluate_result:

934 return self.parser.evaluate_result(m)

935 else:

936 return Match(self.parser, m)

937

938 # pre-py3k compat

939 next = __next__

940

941

942def parse(format, string, extra_types=None, evaluate_result=True, case_sensitive=False):

943 """Using "format" attempt to pull values from "string".

944

945 The format must match the string contents exactly. If the value

946 you're looking for is instead just a part of the string use

947 search().

948

949 If ``evaluate_result`` is True the return value will be an Result instance with two attributes:

950

951 .fixed - tuple of fixed-position values from the string

952 .named - dict of named values from the string

953

954 If ``evaluate_result`` is False the return value will be a Match instance with one method:

955

956 .evaluate_result() - This will return a Result instance like you would get

957 with ``evaluate_result`` set to True

958

959 The default behaviour is to match strings case insensitively. You may match with

960 case by specifying case_sensitive=True.

961

962 If the format is invalid a ValueError will be raised.

963

964 See the module documentation for the use of "extra_types".

965

966 In the case there is no match parse() will return None.

967 """

968 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

969 return p.parse(string, evaluate_result=evaluate_result)

970

971

972def search(

973 format,

974 string,

975 pos=0,

976 endpos=None,

977 extra_types=None,

978 evaluate_result=True,

979 case_sensitive=False,

980):

981 """Search "string" for the first occurrence of "format".

982

983 The format may occur anywhere within the string. If

984 instead you wish for the format to exactly match the string

985 use parse().

986

987 Optionally start the search at "pos" character index and limit the search

988 to a maximum index of endpos - equivalent to search(string[:endpos]).

989

990 If ``evaluate_result`` is True the return value will be an Result instance with two attributes:

991

992 .fixed - tuple of fixed-position values from the string

993 .named - dict of named values from the string

994

995 If ``evaluate_result`` is False the return value will be a Match instance with one method:

996

997 .evaluate_result() - This will return a Result instance like you would get

998 with ``evaluate_result`` set to True

999

1000 The default behaviour is to match strings case insensitively. You may match with

1001 case by specifying case_sensitive=True.

1002

1003 If the format is invalid a ValueError will be raised.

1004

1005 See the module documentation for the use of "extra_types".

1006

1007 In the case there is no match parse() will return None.

1008 """

1009 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

1010 return p.search(string, pos, endpos, evaluate_result=evaluate_result)

1011

1012

1013def findall(

1014 format,

1015 string,

1016 pos=0,

1017 endpos=None,

1018 extra_types=None,

1019 evaluate_result=True,

1020 case_sensitive=False,

1021):

1022 """Search "string" for all occurrences of "format".

1023

1024 You will be returned an iterator that holds Result instances

1025 for each format match found.

1026

1027 Optionally start the search at "pos" character index and limit the search

1028 to a maximum index of endpos - equivalent to search(string[:endpos]).

1029

1030 If ``evaluate_result`` is True each returned Result instance has two attributes:

1031

1032 .fixed - tuple of fixed-position values from the string

1033 .named - dict of named values from the string

1034

1035 If ``evaluate_result`` is False each returned value is a Match instance with one method:

1036

1037 .evaluate_result() - This will return a Result instance like you would get

1038 with ``evaluate_result`` set to True

1039

1040 The default behaviour is to match strings case insensitively. You may match with

1041 case by specifying case_sensitive=True.

1042

1043 If the format is invalid a ValueError will be raised.

1044

1045 See the module documentation for the use of "extra_types".

1046 """

1047 p = Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

1048 return p.findall(string, pos, endpos, evaluate_result=evaluate_result)

1049

1050

1051def compile(format, extra_types=None, case_sensitive=False):

1052 """Create a Parser instance to parse "format".

1053

1054 The resultant Parser has a method .parse(string) which

1055 behaves in the same manner as parse(format, string).

1056

1057 The default behaviour is to match strings case insensitively. You may match with

1058 case by specifying case_sensitive=True.

1059

1060 Use this function if you intend to parse many strings

1061 with the same format.

1062

1063 See the module documentation for the use of "extra_types".

1064

1065 Returns a Parser instance.

1066 """

1067 return Parser(format, extra_types=extra_types, case_sensitive=case_sensitive)

1068

1069

1071#

1072# Permission is hereby granted, free of charge, to any person obtaining a copy

1073# of this software and associated documentation files (the "Software"), to deal

1074# in the Software without restriction, including without limitation the rights

1075# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

1076# copies of the Software, and to permit persons to whom the Software is

1077# furnished to do so, subject to the following conditions:

1078#

1079# The above copyright notice and this permission notice shall be included in

1080# all copies or substantial portions of the Software.

1081#

1082# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

1083# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

1084# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

1085# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

1086# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

1087# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

1088# SOFTWARE.

1089

1090# vim: set filetype=python ts=4 sw=4 et si tw=75