Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dateutil/parser/

1# -*- coding: utf-8 -*-

2"""

3This module offers a generic date/time string parser which is able to parse

4most known formats to represent a date and/or time.

6This module attempts to be forgiving with regards to unlikely input formats,

7returning a datetime object even for dates which are ambiguous. If an element

8of a date/time stamp is omitted, the following rules are applied:

10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour

11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is

12 specified.

13- If a time zone is omitted, a timezone-naive datetime is returned.

15If any other elements are missing, they are taken from the

16:class:`datetime.datetime` object passed to the parameter ``default``. If this

17results in a day number exceeding the valid number of days per month, the

18value falls back to the end of the month.

20Additional resources about date/time string formats can be found below:

22- `A summary of the international standard date and time notation

23 <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_

24- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_

25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_

26- `CPAN ParseDate module

27 <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_

28- `Java SimpleDateFormat Class

29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_

30"""

31from __future__ import unicode_literals

33import datetime

34import re

35import string

36import time

37import warnings

39from calendar import monthrange

40from io import StringIO

42import six

43from six import integer_types, text_type

45from decimal import Decimal

47from warnings import warn

49from .. import relativedelta

50from .. import tz

52__all__ = ["parse", "parserinfo", "ParserError"]

55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth

56# making public and/or figuring out if there is something we can

57# take off their plate.

58class _timelex(object):

59 # Fractional seconds are sometimes split by a comma

60 _split_decimal = re.compile("([.,])")

62 def __init__(self, instream):

63 if isinstance(instream, (bytes, bytearray)):

64 instream = instream.decode()

66 if isinstance(instream, text_type):

67 instream = StringIO(instream)

68 elif getattr(instream, 'read', None) is None:

69 raise TypeError('Parser must be a string or character stream, not '

70 '{itype}'.format(itype=instream.__class__.__name__))

72 self.instream = instream

73 self.charstack = []

74 self.tokenstack = []

75 self.eof = False

77 def get_token(self):

78 """

79 This function breaks the time string into lexical units (tokens), which

80 can be parsed by the parser. Lexical units are demarcated by changes in

81 the character set, so any continuous string of letters is considered

82 one unit, any continuous string of numbers is considered one unit.

84 The main complication arises from the fact that dots ('.') can be used

85 both as separators (e.g. "Sep.20.2009") or decimal points (e.g.

86 "4:30:21.447"). As such, it is necessary to read the full context of

87 any dot-separated strings before breaking it into tokens; as such, this

88 function maintains a "token stack", for when the ambiguous context

89 demands that multiple tokens be parsed at once.

90 """

91 if self.tokenstack:

92 return self.tokenstack.pop(0)

94 seenletters = False

95 token = None

96 state = None

98 while not self.eof:

99 # We only realize that we've reached the end of a token when we

100 # find a character that's not part of the current token - since

101 # that character may be part of the next token, it's stored in the

102 # charstack.

103 if self.charstack:

104 nextchar = self.charstack.pop(0)

105 else:

106 nextchar = self.instream.read(1)

107 while nextchar == '\x00':

108 nextchar = self.instream.read(1)

109

110 if not nextchar:

111 self.eof = True

112 break

113 elif not state:

114 # First character of the token - determines if we're starting

115 # to parse a word, a number or something else.

116 token = nextchar

117 if self.isword(nextchar):

118 state = 'a'

119 elif self.isnum(nextchar):

120 state = '0'

121 elif self.isspace(nextchar):

122 token = ' '

123 break # emit token

124 else:

125 break # emit token

126 elif state == 'a':

127 # If we've already started reading a word, we keep reading

128 # letters until we find something that's not part of a word.

129 seenletters = True

130 if self.isword(nextchar):

131 token += nextchar

132 elif nextchar == '.':

133 token += nextchar

134 state = 'a.'

135 else:

136 self.charstack.append(nextchar)

137 break # emit token

138 elif state == '0':

139 # If we've already started reading a number, we keep reading

140 # numbers until we find something that doesn't fit.

141 if self.isnum(nextchar):

142 token += nextchar

143 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):

144 token += nextchar

145 state = '0.'

146 else:

147 self.charstack.append(nextchar)

148 break # emit token

149 elif state == 'a.':

150 # If we've seen some letters and a dot separator, continue

151 # parsing, and the tokens will be broken up later.

152 seenletters = True

153 if nextchar == '.' or self.isword(nextchar):

154 token += nextchar

155 elif self.isnum(nextchar) and token[-1] == '.':

156 token += nextchar

157 state = '0.'

158 else:

159 self.charstack.append(nextchar)

160 break # emit token

161 elif state == '0.':

162 # If we've seen at least one dot separator, keep going, we'll

163 # break up the tokens later.

164 if nextchar == '.' or self.isnum(nextchar):

165 token += nextchar

166 elif self.isword(nextchar) and token[-1] == '.':

167 token += nextchar

168 state = 'a.'

169 else:

170 self.charstack.append(nextchar)

171 break # emit token

172

173 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or

174 token[-1] in '.,')):

175 l = self._split_decimal.split(token)

176 token = l[0]

177 for tok in l[1:]:

178 if tok:

179 self.tokenstack.append(tok)

180

181 if state == '0.' and token.count('.') == 0:

182 token = token.replace(',', '.')

183

184 return token

185

186 def __iter__(self):

187 return self

188

189 def __next__(self):

190 token = self.get_token()

191 if token is None:

192 raise StopIteration

193

194 return token

195

196 def next(self):

197 return self.__next__() # Python 2.x support

198

199 @classmethod

200 def split(cls, s):

201 return list(cls(s))

202

203 @classmethod

204 def isword(cls, nextchar):

205 """ Whether or not the next character is part of a word """

206 return nextchar.isalpha()

207

208 @classmethod

209 def isnum(cls, nextchar):

210 """ Whether the next character is part of a number """

211 return nextchar.isdigit()

212

213 @classmethod

214 def isspace(cls, nextchar):

215 """ Whether the next character is whitespace """

216 return nextchar.isspace()

217

218

219class _resultbase(object):

220

221 def __init__(self):

222 for attr in self.__slots__:

223 setattr(self, attr, None)

224

225 def _repr(self, classname):

226 l = []

227 for attr in self.__slots__:

228 value = getattr(self, attr)

229 if value is not None:

230 l.append("%s=%s" % (attr, repr(value)))

231 return "%s(%s)" % (classname, ", ".join(l))

232

233 def __len__(self):

234 return (sum(getattr(self, attr) is not None

235 for attr in self.__slots__))

236

237 def __repr__(self):

238 return self._repr(self.__class__.__name__)

239

240

241class parserinfo(object):

242 """

243 Class which handles what inputs are accepted. Subclass this to customize

244 the language and acceptable values for each parameter.

245

246 :param dayfirst:

247 Whether to interpret the first value in an ambiguous 3-integer date

248 (e.g. 01/05/09) as the day (``True``) or month (``False``). If

249 ``yearfirst`` is set to ``True``, this distinguishes between YDM

250 and YMD. Default is ``False``.

251

252 :param yearfirst:

253 Whether to interpret the first value in an ambiguous 3-integer date

254 (e.g. 01/05/09) as the year. If ``True``, the first number is taken

255 to be the year, otherwise the last number is taken to be the year.

256 Default is ``False``.

257 """

258

259 # m from a.m/p.m, t from ISO T separator

260 JUMP = [" ", ".", ",", ";", "-", "/", "'",

261 "at", "on", "and", "ad", "m", "t", "of",

262 "st", "nd", "rd", "th"]

263

264 WEEKDAYS = [("Mon", "Monday"),

265 ("Tue", "Tuesday"), # TODO: "Tues"

266 ("Wed", "Wednesday"),

267 ("Thu", "Thursday"), # TODO: "Thurs"

268 ("Fri", "Friday"),

269 ("Sat", "Saturday"),

270 ("Sun", "Sunday")]

271 MONTHS = [("Jan", "January"),

272 ("Feb", "February"), # TODO: "Febr"

273 ("Mar", "March"),

274 ("Apr", "April"),

275 ("May", "May"),

276 ("Jun", "June"),

277 ("Jul", "July"),

278 ("Aug", "August"),

279 ("Sep", "Sept", "September"),

280 ("Oct", "October"),

281 ("Nov", "November"),

282 ("Dec", "December")]

283 HMS = [("h", "hour", "hours"),

284 ("m", "minute", "minutes"),

285 ("s", "second", "seconds")]

286 AMPM = [("am", "a"),

287 ("pm", "p")]

288 UTCZONE = ["UTC", "GMT", "Z", "z"]

289 PERTAIN = ["of"]

290 TZOFFSET = {}

291 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",

292 # "Anno Domini", "Year of Our Lord"]

293

294 def __init__(self, dayfirst=False, yearfirst=False):

295 self._jump = self._convert(self.JUMP)

296 self._weekdays = self._convert(self.WEEKDAYS)

297 self._months = self._convert(self.MONTHS)

298 self._hms = self._convert(self.HMS)

299 self._ampm = self._convert(self.AMPM)

300 self._utczone = self._convert(self.UTCZONE)

301 self._pertain = self._convert(self.PERTAIN)

302

303 self.dayfirst = dayfirst

304 self.yearfirst = yearfirst

305

306 self._year = time.localtime().tm_year

307 self._century = self._year // 100 * 100

308

309 def _convert(self, lst):

310 dct = {}

311 for i, v in enumerate(lst):

312 if isinstance(v, tuple):

313 for v in v:

314 dct[v.lower()] = i

315 else:

316 dct[v.lower()] = i

317 return dct

318

319 def jump(self, name):

320 return name.lower() in self._jump

321

322 def weekday(self, name):

323 try:

324 return self._weekdays[name.lower()]

325 except KeyError:

326 pass

327 return None

328

329 def month(self, name):

330 try:

331 return self._months[name.lower()] + 1

332 except KeyError:

333 pass

334 return None

335

336 def hms(self, name):

337 try:

338 return self._hms[name.lower()]

339 except KeyError:

340 return None

341

342 def ampm(self, name):

343 try:

344 return self._ampm[name.lower()]

345 except KeyError:

346 return None

347

348 def pertain(self, name):

349 return name.lower() in self._pertain

350

351 def utczone(self, name):

352 return name.lower() in self._utczone

353

354 def tzoffset(self, name):

355 if name in self._utczone:

356 return 0

357

358 return self.TZOFFSET.get(name)

359

360 def convertyear(self, year, century_specified=False):

361 """

362 Converts two-digit years to year within [-50, 49]

363 range of self._year (current local time)

364 """

365

366 # Function contract is that the year is always positive

367 assert year >= 0

368

369 if year < 100 and not century_specified:

370 # assume current century to start

371 year += self._century

372

373 if year >= self._year + 50: # if too far in future

374 year -= 100

375 elif year < self._year - 50: # if too far in past

376 year += 100

377

378 return year

379

380 def validate(self, res):

381 # move to info

382 if res.year is not None:

383 res.year = self.convertyear(res.year, res.century_specified)

384

385 if ((res.tzoffset == 0 and not res.tzname) or

386 (res.tzname == 'Z' or res.tzname == 'z')):

387 res.tzname = "UTC"

388 res.tzoffset = 0

389 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):

390 res.tzoffset = 0

391 return True

392

393

394class _ymd(list):

395 def __init__(self, *args, **kwargs):

396 super(self.__class__, self).__init__(*args, **kwargs)

397 self.century_specified = False

398 self.dstridx = None

399 self.mstridx = None

400 self.ystridx = None

401

402 @property

403 def has_year(self):

404 return self.ystridx is not None

405

406 @property

407 def has_month(self):

408 return self.mstridx is not None

409

410 @property

411 def has_day(self):

412 return self.dstridx is not None

413

414 def could_be_day(self, value):

415 if self.has_day:

416 return False

417 elif not self.has_month:

418 return 1 <= value <= 31

419 elif not self.has_year:

420 # Be permissive, assume leap year

421 month = self[self.mstridx]

422 return 1 <= value <= monthrange(2000, month)[1]

423 else:

424 month = self[self.mstridx]

425 year = self[self.ystridx]

426 return 1 <= value <= monthrange(year, month)[1]

427

428 def append(self, val, label=None):

429 if hasattr(val, '__len__'):

430 if val.isdigit() and len(val) > 2:

431 self.century_specified = True

432 if label not in [None, 'Y']: # pragma: no cover

433 raise ValueError(label)

434 label = 'Y'

435 elif val > 100:

436 self.century_specified = True

437 if label not in [None, 'Y']: # pragma: no cover

438 raise ValueError(label)

439 label = 'Y'

440

441 super(self.__class__, self).append(int(val))

442

443 if label == 'M':

444 if self.has_month:

445 raise ValueError('Month is already set')

446 self.mstridx = len(self) - 1

447 elif label == 'D':

448 if self.has_day:

449 raise ValueError('Day is already set')

450 self.dstridx = len(self) - 1

451 elif label == 'Y':

452 if self.has_year:

453 raise ValueError('Year is already set')

454 self.ystridx = len(self) - 1

455

456 def _resolve_from_stridxs(self, strids):

457 """

458 Try to resolve the identities of year/month/day elements using

459 ystridx, mstridx, and dstridx, if enough of these are specified.

460 """

461 if len(self) == 3 and len(strids) == 2:

462 # we can back out the remaining stridx value

463 missing = [x for x in range(3) if x not in strids.values()]

464 key = [x for x in ['y', 'm', 'd'] if x not in strids]

465 assert len(missing) == len(key) == 1

466 key = key[0]

467 val = missing[0]

468 strids[key] = val

469

470 assert len(self) == len(strids) # otherwise this should not be called

471 out = {key: self[strids[key]] for key in strids}

472 return (out.get('y'), out.get('m'), out.get('d'))

473

474 def resolve_ymd(self, yearfirst, dayfirst):

475 len_ymd = len(self)

476 year, month, day = (None, None, None)

477

478 strids = (('y', self.ystridx),

479 ('m', self.mstridx),

480 ('d', self.dstridx))

481

482 strids = {key: val for key, val in strids if val is not None}

483 if (len(self) == len(strids) > 0 or

484 (len(self) == 3 and len(strids) == 2)):

485 return self._resolve_from_stridxs(strids)

486

487 mstridx = self.mstridx

488

489 if len_ymd > 3:

490 raise ValueError("More than three YMD values")

491 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):

492 # One member, or two members with a month string

493 if mstridx is not None:

494 month = self[mstridx]

495 # since mstridx is 0 or 1, self[mstridx-1] always

496 # looks up the other element

497 other = self[mstridx - 1]

498 else:

499 other = self[0]

500

501 if len_ymd > 1 or mstridx is None:

502 if other > 31:

503 year = other

504 else:

505 day = other

506

507 elif len_ymd == 2:

508 # Two members with numbers

509 if self[0] > 31:

510 # 99-01

511 year, month = self

512 elif self[1] > 31:

513 # 01-99

514 month, year = self

515 elif dayfirst and self[1] <= 12:

516 # 13-01

517 day, month = self

518 else:

519 # 01-13

520 month, day = self

521

522 elif len_ymd == 3:

523 # Three members

524 if mstridx == 0:

525 if self[1] > 31:

526 # Apr-2003-25

527 month, year, day = self

528 else:

529 month, day, year = self

530 elif mstridx == 1:

531 if self[0] > 31 or (yearfirst and self[2] <= 31):

532 # 99-Jan-01

533 year, month, day = self

534 else:

535 # 01-Jan-01

536 # Give precedence to day-first, since

537 # two-digit years is usually hand-written.

538 day, month, year = self

539

540 elif mstridx == 2:

541 # WTF!?

542 if self[1] > 31:

543 # 01-99-Jan

544 day, year, month = self

545 else:

546 # 99-01-Jan

547 year, day, month = self

548

549 else:

550 if (self[0] > 31 or

551 self.ystridx == 0 or

552 (yearfirst and self[1] <= 12 and self[2] <= 31)):

553 # 99-01-01

554 if dayfirst and self[2] <= 12:

555 year, day, month = self

556 else:

557 year, month, day = self

558 elif self[0] > 12 or (dayfirst and self[1] <= 12):

559 # 13-01-01

560 day, month, year = self

561 else:

562 # 01-13-01

563 month, day, year = self

564

565 return year, month, day

566

567

568class parser(object):

569 def __init__(self, info=None):

570 self.info = info or parserinfo()

571

572 def parse(self, timestr, default=None,

573 ignoretz=False, tzinfos=None, **kwargs):

574 """

575 Parse the date/time string into a :class:`datetime.datetime` object.

576

577 :param timestr:

578 Any date/time string using the supported formats.

579

580 :param default:

581 The default datetime object, if this is a datetime object and not

582 ``None``, elements specified in ``timestr`` replace elements in the

583 default object.

584

585 :param ignoretz:

586 If set ``True``, time zones in parsed strings are ignored and a

587 naive :class:`datetime.datetime` object is returned.

588

589 :param tzinfos:

590 Additional time zone names / aliases which may be present in the

591 string. This argument maps time zone names (and optionally offsets

592 from those time zones) to time zones. This parameter can be a

593 dictionary with timezone aliases mapping time zone names to time

594 zones or a function taking two parameters (``tzname`` and

595 ``tzoffset``) and returning a time zone.

596

597 The timezones to which the names are mapped can be an integer

598 offset from UTC in seconds or a :class:`tzinfo` object.

599

600 .. doctest::

601 :options: +NORMALIZE_WHITESPACE

602

603 >>> from dateutil.parser import parse

604 >>> from dateutil.tz import gettz

605 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}

606 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)

607 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))

608 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)

609 datetime.datetime(2012, 1, 19, 17, 21,

610 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))

611

612 This parameter is ignored if ``ignoretz`` is set.

613

614 :param \\*\\*kwargs:

615 Keyword arguments as passed to ``_parse()``.

616

617 :return:

618 Returns a :class:`datetime.datetime` object or, if the

619 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the

620 first element being a :class:`datetime.datetime` object, the second

621 a tuple containing the fuzzy tokens.

622

623 :raises ParserError:

624 Raised for invalid or unknown string format, if the provided

625 :class:`tzinfo` is not in a valid format, or if an invalid date

626 would be created.

627

628 :raises TypeError:

629 Raised for non-string or character stream input.

630

631 :raises OverflowError:

632 Raised if the parsed date exceeds the largest valid C integer on

633 your system.

634 """

635

636 if default is None:

637 default = datetime.datetime.now().replace(hour=0, minute=0,

638 second=0, microsecond=0)

639

640 res, skipped_tokens = self._parse(timestr, **kwargs)

641

642 if res is None:

643 raise ParserError("Unknown string format: %s", timestr)

644

645 if len(res) == 0:

646 raise ParserError("String does not contain a date: %s", timestr)

647

648 try:

649 ret = self._build_naive(res, default)

650 except ValueError as e:

651 six.raise_from(ParserError(str(e) + ": %s", timestr), e)

652

653 if not ignoretz:

654 ret = self._build_tzaware(ret, res, tzinfos)

655

656 if kwargs.get('fuzzy_with_tokens', False):

657 return ret, skipped_tokens

658 else:

659 return ret

660

661 class _result(_resultbase):

662 __slots__ = ["year", "month", "day", "weekday",

663 "hour", "minute", "second", "microsecond",

664 "tzname", "tzoffset", "ampm","any_unused_tokens"]

665

666 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,

667 fuzzy_with_tokens=False):

668 """

669 Private method which performs the heavy lifting of parsing, called from

670 ``parse()``, which passes on its ``kwargs`` to this function.

671

672 :param timestr:

673 The string to parse.

674

675 :param dayfirst:

676 Whether to interpret the first value in an ambiguous 3-integer date

677 (e.g. 01/05/09) as the day (``True``) or month (``False``). If

678 ``yearfirst`` is set to ``True``, this distinguishes between YDM

679 and YMD. If set to ``None``, this value is retrieved from the

680 current :class:`parserinfo` object (which itself defaults to

681 ``False``).

682

683 :param yearfirst:

684 Whether to interpret the first value in an ambiguous 3-integer date

685 (e.g. 01/05/09) as the year. If ``True``, the first number is taken

686 to be the year, otherwise the last number is taken to be the year.

687 If this is set to ``None``, the value is retrieved from the current

688 :class:`parserinfo` object (which itself defaults to ``False``).

689

690 :param fuzzy:

691 Whether to allow fuzzy parsing, allowing for string like "Today is

692 January 1, 2047 at 8:21:00AM".

693

694 :param fuzzy_with_tokens:

695 If ``True``, ``fuzzy`` is automatically set to True, and the parser

696 will return a tuple where the first element is the parsed

697 :class:`datetime.datetime` datetimestamp and the second element is

698 a tuple containing the portions of the string which were ignored:

699

700 .. doctest::

701

702 >>> from dateutil.parser import parse

703 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)

704 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))

705

706 """

707 if fuzzy_with_tokens:

708 fuzzy = True

709

710 info = self.info

711

712 if dayfirst is None:

713 dayfirst = info.dayfirst

714

715 if yearfirst is None:

716 yearfirst = info.yearfirst

717

718 res = self._result()

719 l = _timelex.split(timestr) # Splits the timestr into tokens

720

721 skipped_idxs = []

722

723 # year/month/day list

724 ymd = _ymd()

725

726 len_l = len(l)

727 i = 0

728 try:

729 while i < len_l:

730

731 # Check if it's a number

732 value_repr = l[i]

733 try:

734 value = float(value_repr)

735 except ValueError:

736 value = None

737

738 if value is not None:

739 # Numeric token

740 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)

741

742 # Check weekday

743 elif info.weekday(l[i]) is not None:

744 value = info.weekday(l[i])

745 res.weekday = value

746

747 # Check month name

748 elif info.month(l[i]) is not None:

749 value = info.month(l[i])

750 ymd.append(value, 'M')

751

752 if i + 1 < len_l:

753 if l[i + 1] in ('-', '/'):

754 # Jan-01[-99]

755 sep = l[i + 1]

756 ymd.append(l[i + 2])

757

758 if i + 3 < len_l and l[i + 3] == sep:

759 # Jan-01-99

760 ymd.append(l[i + 4])

761 i += 2

762

763 i += 2

764

765 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and

766 info.pertain(l[i + 2])):

767 # Jan of 01

768 # In this case, 01 is clearly year

769 if l[i + 4].isdigit():

770 # Convert it here to become unambiguous

771 value = int(l[i + 4])

772 year = str(info.convertyear(value))

773 ymd.append(year, 'Y')

774 else:

775 # Wrong guess

776 pass

777 # TODO: not hit in tests

778 i += 4

779

780 # Check am/pm

781 elif info.ampm(l[i]) is not None:

782 value = info.ampm(l[i])

783 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)

784

785 if val_is_ampm:

786 res.hour = self._adjust_ampm(res.hour, value)

787 res.ampm = value

788

789 elif fuzzy:

790 skipped_idxs.append(i)

791

792 # Check for a timezone name

793 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):

794 res.tzname = l[i]

795 res.tzoffset = info.tzoffset(res.tzname)

796

797 # Check for something like GMT+3, or BRST+3. Notice

798 # that it doesn't mean "I am 3 hours after GMT", but

799 # "my time +3 is GMT". If found, we reverse the

800 # logic so that timezone parsing code will get it

801 # right.

802 if i + 1 < len_l and l[i + 1] in ('+', '-'):

803 l[i + 1] = ('+', '-')[l[i + 1] == '+']

804 res.tzoffset = None

805 if info.utczone(res.tzname):

806 # With something like GMT+3, the timezone

807 # is *not* GMT.

808 res.tzname = None

809

810 # Check for a numbered timezone

811 elif res.hour is not None and l[i] in ('+', '-'):

812 signal = (-1, 1)[l[i] == '+']

813 len_li = len(l[i + 1])

814

815 # TODO: check that l[i + 1] is integer?

816 if len_li == 4:

817 # -0300

818 hour_offset = int(l[i + 1][:2])

819 min_offset = int(l[i + 1][2:])

820 elif i + 2 < len_l and l[i + 2] == ':':

821 # -03:00

822 hour_offset = int(l[i + 1])

823 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like?

824 i += 2

825 elif len_li <= 2:

826 # -[0]3

827 hour_offset = int(l[i + 1][:2])

828 min_offset = 0

829 else:

830 raise ValueError(timestr)

831

832 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)

833

834 # Look for a timezone name between parenthesis

835 if (i + 5 < len_l and

836 info.jump(l[i + 2]) and l[i + 3] == '(' and

837 l[i + 5] == ')' and

838 3 <= len(l[i + 4]) and

839 self._could_be_tzname(res.hour, res.tzname,

840 None, l[i + 4])):

841 # -0300 (BRST)

842 res.tzname = l[i + 4]

843 i += 4

844

845 i += 1

846

847 # Check jumps

848 elif not (info.jump(l[i]) or fuzzy):

849 raise ValueError(timestr)

850

851 else:

852 skipped_idxs.append(i)

853 i += 1

854

855 # Process year/month/day

856 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)

857

858 res.century_specified = ymd.century_specified

859 res.year = year

860 res.month = month

861 res.day = day

862

863 except (IndexError, ValueError):

864 return None, None

865

866 if not info.validate(res):

867 return None, None

868

869 if fuzzy_with_tokens:

870 skipped_tokens = self._recombine_skipped(l, skipped_idxs)

871 return res, tuple(skipped_tokens)

872 else:

873 return res, None

874

875 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):

876 # Token is a number

877 value_repr = tokens[idx]

878 try:

879 value = self._to_decimal(value_repr)

880 except Exception as e:

881 six.raise_from(ValueError('Unknown numeric token'), e)

882

883 len_li = len(value_repr)

884

885 len_l = len(tokens)

886

887 if (len(ymd) == 3 and len_li in (2, 4) and

888 res.hour is None and

889 (idx + 1 >= len_l or

890 (tokens[idx + 1] != ':' and

891 info.hms(tokens[idx + 1]) is None))):

892 # 19990101T23[59]

893 s = tokens[idx]

894 res.hour = int(s[:2])

895

896 if len_li == 4:

897 res.minute = int(s[2:])

898

899 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):

900 # YYMMDD or HHMMSS[.ss]

901 s = tokens[idx]

902

903 if not ymd and '.' not in tokens[idx]:

904 ymd.append(s[:2])

905 ymd.append(s[2:4])

906 ymd.append(s[4:])

907 else:

908 # 19990101T235959[.59]

909

910 # TODO: Check if res attributes already set.

911 res.hour = int(s[:2])

912 res.minute = int(s[2:4])

913 res.second, res.microsecond = self._parsems(s[4:])

914

915 elif len_li in (8, 12, 14):

916 # YYYYMMDD

917 s = tokens[idx]

918 ymd.append(s[:4], 'Y')

919 ymd.append(s[4:6])

920 ymd.append(s[6:8])

921

922 if len_li > 8:

923 res.hour = int(s[8:10])

924 res.minute = int(s[10:12])

925

926 if len_li > 12:

927 res.second = int(s[12:])

928

929 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:

930 # HH[ ]h or MM[ ]m or SS[.ss][ ]s

931 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)

932 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)

933 if hms is not None:

934 # TODO: checking that hour/minute/second are not

935 # already set?

936 self._assign_hms(res, value_repr, hms)

937

938 elif idx + 2 < len_l and tokens[idx + 1] == ':':

939 # HH:MM[:SS[.ss]]

940 res.hour = int(value)

941 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this?

942 (res.minute, res.second) = self._parse_min_sec(value)

943

944 if idx + 4 < len_l and tokens[idx + 3] == ':':

945 res.second, res.microsecond = self._parsems(tokens[idx + 4])

946

947 idx += 2

948

949 idx += 2

950

951 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):

952 sep = tokens[idx + 1]

953 ymd.append(value_repr)

954

955 if idx + 2 < len_l and not info.jump(tokens[idx + 2]):

956 if tokens[idx + 2].isdigit():

957 # 01-01[-01]

958 ymd.append(tokens[idx + 2])

959 else:

960 # 01-Jan[-01]

961 value = info.month(tokens[idx + 2])

962

963 if value is not None:

964 ymd.append(value, 'M')

965 else:

966 raise ValueError()

967

968 if idx + 3 < len_l and tokens[idx + 3] == sep:

969 # We have three members

970 value = info.month(tokens[idx + 4])

971

972 if value is not None:

973 ymd.append(value, 'M')

974 else:

975 ymd.append(tokens[idx + 4])

976 idx += 2

977

978 idx += 1

979 idx += 1

980

981 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):

982 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:

983 # 12 am

984 hour = int(value)

985 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))

986 idx += 1

987 else:

988 # Year, month or day

989 ymd.append(value)

990 idx += 1

991

992 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):

993 # 12am

994 hour = int(value)

995 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))

996 idx += 1

997

998 elif ymd.could_be_day(value):

999 ymd.append(value)

1000

1001 elif not fuzzy:

1002 raise ValueError()

1003

1004 return idx

1005

1006 def _find_hms_idx(self, idx, tokens, info, allow_jump):

1007 len_l = len(tokens)

1008

1009 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:

1010 # There is an "h", "m", or "s" label following this token. We take

1011 # assign the upcoming label to the current token.

1012 # e.g. the "12" in 12h"

1013 hms_idx = idx + 1

1014

1015 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and

1016 info.hms(tokens[idx+2]) is not None):

1017 # There is a space and then an "h", "m", or "s" label.

1018 # e.g. the "12" in "12 h"

1019 hms_idx = idx + 2

1020

1021 elif idx > 0 and info.hms(tokens[idx-1]) is not None:

1022 # There is a "h", "m", or "s" preceding this token. Since neither

1023 # of the previous cases was hit, there is no label following this

1024 # token, so we use the previous label.

1025 # e.g. the "04" in "12h04"

1026 hms_idx = idx-1

1027

1028 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and

1029 info.hms(tokens[idx-2]) is not None):

1030 # If we are looking at the final token, we allow for a

1031 # backward-looking check to skip over a space.

1032 # TODO: Are we sure this is the right condition here?

1033 hms_idx = idx - 2

1034

1035 else:

1036 hms_idx = None

1037

1038 return hms_idx

1039

1040 def _assign_hms(self, res, value_repr, hms):

1041 # See GH issue #427, fixing float rounding

1042 value = self._to_decimal(value_repr)

1043

1044 if hms == 0:

1045 # Hour

1046 res.hour = int(value)

1047 if value % 1:

1048 res.minute = int(60*(value % 1))

1049

1050 elif hms == 1:

1051 (res.minute, res.second) = self._parse_min_sec(value)

1052

1053 elif hms == 2:

1054 (res.second, res.microsecond) = self._parsems(value_repr)

1055

1056 def _could_be_tzname(self, hour, tzname, tzoffset, token):

1057 return (hour is not None and

1058 tzname is None and

1059 tzoffset is None and

1060 len(token) <= 5 and

1061 (all(x in string.ascii_uppercase for x in token)

1062 or token in self.info.UTCZONE))

1063

1064 def _ampm_valid(self, hour, ampm, fuzzy):

1065 """

1066 For fuzzy parsing, 'a' or 'am' (both valid English words)

1067 may erroneously trigger the AM/PM flag. Deal with that

1068 here.

1069 """

1070 val_is_ampm = True

1071

1072 # If there's already an AM/PM flag, this one isn't one.

1073 if fuzzy and ampm is not None:

1074 val_is_ampm = False

1075

1076 # If AM/PM is found and hour is not, raise a ValueError

1077 if hour is None:

1078 if fuzzy:

1079 val_is_ampm = False

1080 else:

1081 raise ValueError('No hour specified with AM or PM flag.')

1082 elif not 0 <= hour <= 12:

1083 # If AM/PM is found, it's a 12 hour clock, so raise

1084 # an error for invalid range

1085 if fuzzy:

1086 val_is_ampm = False

1087 else:

1088 raise ValueError('Invalid hour specified for 12-hour clock.')

1089

1090 return val_is_ampm

1091

1092 def _adjust_ampm(self, hour, ampm):

1093 if hour < 12 and ampm == 1:

1094 hour += 12

1095 elif hour == 12 and ampm == 0:

1096 hour = 0

1097 return hour

1098

1099 def _parse_min_sec(self, value):

1100 # TODO: Every usage of this function sets res.second to the return

1101 # value. Are there any cases where second will be returned as None and

1102 # we *don't* want to set res.second = None?

1103 minute = int(value)

1104 second = None

1105

1106 sec_remainder = value % 1

1107 if sec_remainder:

1108 second = int(60 * sec_remainder)

1109 return (minute, second)

1110

1111 def _parse_hms(self, idx, tokens, info, hms_idx):

1112 # TODO: Is this going to admit a lot of false-positives for when we

1113 # just happen to have digits and "h", "m" or "s" characters in non-date

1114 # text? I guess hex hashes won't have that problem, but there's plenty

1115 # of random junk out there.

1116 if hms_idx is None:

1117 hms = None

1118 new_idx = idx

1119 elif hms_idx > idx:

1120 hms = info.hms(tokens[hms_idx])

1121 new_idx = hms_idx

1122 else:

1123 # Looking backwards, increment one.

1124 hms = info.hms(tokens[hms_idx]) + 1

1125 new_idx = idx

1126

1127 return (new_idx, hms)

1128

1129 # ------------------------------------------------------------------

1130 # Handling for individual tokens. These are kept as methods instead

1131 # of functions for the sake of customizability via subclassing.

1132

1133 def _parsems(self, value):

1134 """Parse a I[.F] seconds value into (seconds, microseconds)."""

1135 if "." not in value:

1136 return int(value), 0

1137 else:

1138 i, f = value.split(".")

1139 return int(i), int(f.ljust(6, "0")[:6])

1140

1141 def _to_decimal(self, val):

1142 try:

1143 decimal_value = Decimal(val)

1144 # See GH 662, edge case, infinite value should not be converted

1145 # via `_to_decimal`

1146 if not decimal_value.is_finite():

1147 raise ValueError("Converted decimal value is infinite or NaN")

1148 except Exception as e:

1149 msg = "Could not convert %s to decimal" % val

1150 six.raise_from(ValueError(msg), e)

1151 else:

1152 return decimal_value

1153

1154 # ------------------------------------------------------------------

1155 # Post-Parsing construction of datetime output. These are kept as

1156 # methods instead of functions for the sake of customizability via

1157 # subclassing.

1158

1159 def _build_tzinfo(self, tzinfos, tzname, tzoffset):

1160 if callable(tzinfos):

1161 tzdata = tzinfos(tzname, tzoffset)

1162 else:

1163 tzdata = tzinfos.get(tzname)

1164 # handle case where tzinfo is paased an options that returns None

1165 # eg tzinfos = {'BRST' : None}

1166 if isinstance(tzdata, datetime.tzinfo) or tzdata is None:

1167 tzinfo = tzdata

1168 elif isinstance(tzdata, text_type):

1169 tzinfo = tz.tzstr(tzdata)

1170 elif isinstance(tzdata, integer_types):

1171 tzinfo = tz.tzoffset(tzname, tzdata)

1172 else:

1173 raise TypeError("Offset must be tzinfo subclass, tz string, "

1174 "or int offset.")

1175 return tzinfo

1176

1177 def _build_tzaware(self, naive, res, tzinfos):

1178 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):

1179 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)

1180 aware = naive.replace(tzinfo=tzinfo)

1181 aware = self._assign_tzname(aware, res.tzname)

1182

1183 elif res.tzname and res.tzname in time.tzname:

1184 aware = naive.replace(tzinfo=tz.tzlocal())

1185

1186 # Handle ambiguous local datetime

1187 aware = self._assign_tzname(aware, res.tzname)

1188

1189 # This is mostly relevant for winter GMT zones parsed in the UK

1190 if (aware.tzname() != res.tzname and

1191 res.tzname in self.info.UTCZONE):

1192 aware = aware.replace(tzinfo=tz.UTC)

1193

1194 elif res.tzoffset == 0:

1195 aware = naive.replace(tzinfo=tz.UTC)

1196

1197 elif res.tzoffset:

1198 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))

1199

1200 elif not res.tzname and not res.tzoffset:

1201 # i.e. no timezone information was found.

1202 aware = naive

1203

1204 elif res.tzname:

1205 # tz-like string was parsed but we don't know what to do

1206 # with it

1207 warnings.warn("tzname {tzname} identified but not understood. "

1208 "Pass `tzinfos` argument in order to correctly "

1209 "return a timezone-aware datetime. In a future "

1210 "version, this will raise an "

1211 "exception.".format(tzname=res.tzname),

1212 category=UnknownTimezoneWarning)

1213 aware = naive

1214

1215 return aware

1216

1217 def _build_naive(self, res, default):

1218 repl = {}

1219 for attr in ("year", "month", "day", "hour",

1220 "minute", "second", "microsecond"):

1221 value = getattr(res, attr)

1222 if value is not None:

1223 repl[attr] = value

1224

1225 if 'day' not in repl:

1226 # If the default day exceeds the last day of the month, fall back

1227 # to the end of the month.

1228 cyear = default.year if res.year is None else res.year

1229 cmonth = default.month if res.month is None else res.month

1230 cday = default.day if res.day is None else res.day

1231

1232 if cday > monthrange(cyear, cmonth)[1]:

1233 repl['day'] = monthrange(cyear, cmonth)[1]

1234

1235 naive = default.replace(**repl)

1236

1237 if res.weekday is not None and not res.day:

1238 naive = naive + relativedelta.relativedelta(weekday=res.weekday)

1239

1240 return naive

1241

1242 def _assign_tzname(self, dt, tzname):

1243 if dt.tzname() != tzname:

1244 new_dt = tz.enfold(dt, fold=1)

1245 if new_dt.tzname() == tzname:

1246 return new_dt

1247

1248 return dt

1249

1250 def _recombine_skipped(self, tokens, skipped_idxs):

1251 """

1252 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]

1253 >>> skipped_idxs = [0, 1, 2, 5]

1254 >>> _recombine_skipped(tokens, skipped_idxs)

1255 ["foo bar", "baz"]

1256 """

1257 skipped_tokens = []

1258 for i, idx in enumerate(sorted(skipped_idxs)):

1259 if i > 0 and idx - 1 == skipped_idxs[i - 1]:

1260 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]

1261 else:

1262 skipped_tokens.append(tokens[idx])

1263

1264 return skipped_tokens

1265

1266

1267DEFAULTPARSER = parser()

1268

1269

1270def parse(timestr, parserinfo=None, **kwargs):

1271 """

1272

1273 Parse a string in one of the supported formats, using the

1274 ``parserinfo`` parameters.

1275

1276 :param timestr:

1277 A string containing a date/time stamp.

1278

1279 :param parserinfo:

1280 A :class:`parserinfo` object containing parameters for the parser.

1281 If ``None``, the default arguments to the :class:`parserinfo`

1282 constructor are used.

1283

1284 The ``**kwargs`` parameter takes the following keyword arguments:

1285

1286 :param default:

1287 The default datetime object, if this is a datetime object and not

1288 ``None``, elements specified in ``timestr`` replace elements in the

1289 default object.

1290

1291 :param ignoretz:

1292 If set ``True``, time zones in parsed strings are ignored and a naive

1293 :class:`datetime` object is returned.

1294

1295 :param tzinfos:

1296 Additional time zone names / aliases which may be present in the

1297 string. This argument maps time zone names (and optionally offsets

1298 from those time zones) to time zones. This parameter can be a

1299 dictionary with timezone aliases mapping time zone names to time

1300 zones or a function taking two parameters (``tzname`` and

1301 ``tzoffset``) and returning a time zone.

1302

1303 The timezones to which the names are mapped can be an integer

1304 offset from UTC in seconds or a :class:`tzinfo` object.

1305

1306 .. doctest::

1307 :options: +NORMALIZE_WHITESPACE

1308

1309 >>> from dateutil.parser import parse

1310 >>> from dateutil.tz import gettz

1311 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}

1312 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)

1313 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))

1314 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)

1315 datetime.datetime(2012, 1, 19, 17, 21,

1316 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))

1317

1318 This parameter is ignored if ``ignoretz`` is set.

1319

1320 :param dayfirst:

1321 Whether to interpret the first value in an ambiguous 3-integer date

1322 (e.g. 01/05/09) as the day (``True``) or month (``False``). If

1323 ``yearfirst`` is set to ``True``, this distinguishes between YDM and

1324 YMD. If set to ``None``, this value is retrieved from the current

1325 :class:`parserinfo` object (which itself defaults to ``False``).

1326

1327 :param yearfirst:

1328 Whether to interpret the first value in an ambiguous 3-integer date

1329 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to

1330 be the year, otherwise the last number is taken to be the year. If

1331 this is set to ``None``, the value is retrieved from the current

1332 :class:`parserinfo` object (which itself defaults to ``False``).

1333

1334 :param fuzzy:

1335 Whether to allow fuzzy parsing, allowing for string like "Today is

1336 January 1, 2047 at 8:21:00AM".

1337

1338 :param fuzzy_with_tokens:

1339 If ``True``, ``fuzzy`` is automatically set to True, and the parser

1340 will return a tuple where the first element is the parsed

1341 :class:`datetime.datetime` datetimestamp and the second element is

1342 a tuple containing the portions of the string which were ignored:

1343

1344 .. doctest::

1345

1346 >>> from dateutil.parser import parse

1347 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)

1348 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))

1349

1350 :return:

1351 Returns a :class:`datetime.datetime` object or, if the

1352 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the

1353 first element being a :class:`datetime.datetime` object, the second

1354 a tuple containing the fuzzy tokens.

1355

1356 :raises ParserError:

1357 Raised for invalid or unknown string formats, if the provided

1358 :class:`tzinfo` is not in a valid format, or if an invalid date would

1359 be created.

1360

1361 :raises OverflowError:

1362 Raised if the parsed date exceeds the largest valid C integer on

1363 your system.

1364 """

1365 if parserinfo:

1366 return parser(parserinfo).parse(timestr, **kwargs)

1367 else:

1368 return DEFAULTPARSER.parse(timestr, **kwargs)

1369

1370

1371class _tzparser(object):

1372

1373 class _result(_resultbase):

1374

1375 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",

1376 "start", "end"]

1377

1378 class _attr(_resultbase):

1379 __slots__ = ["month", "week", "weekday",

1380 "yday", "jyday", "day", "time"]

1381

1382 def __repr__(self):

1383 return self._repr("")

1384

1385 def __init__(self):

1386 _resultbase.__init__(self)

1387 self.start = self._attr()

1388 self.end = self._attr()

1389

1390 def parse(self, tzstr):

1391 res = self._result()

1392 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]

1393 used_idxs = list()

1394 try:

1395

1396 len_l = len(l)

1397

1398 i = 0

1399 while i < len_l:

1400 # BRST+3[BRDT[+2]]

1401 j = i

1402 while j < len_l and not [x for x in l[j]

1403 if x in "0123456789:,-+"]:

1404 j += 1

1405 if j != i:

1406 if not res.stdabbr:

1407 offattr = "stdoffset"

1408 res.stdabbr = "".join(l[i:j])

1409 else:

1410 offattr = "dstoffset"

1411 res.dstabbr = "".join(l[i:j])

1412

1413 for ii in range(j):

1414 used_idxs.append(ii)

1415 i = j

1416 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in

1417 "0123456789")):

1418 if l[i] in ('+', '-'):

1419 # Yes, that's right. See the TZ variable

1420 # documentation.

1421 signal = (1, -1)[l[i] == '+']

1422 used_idxs.append(i)

1423 i += 1

1424 else:

1425 signal = -1

1426 len_li = len(l[i])

1427 if len_li == 4:

1428 # -0300

1429 setattr(res, offattr, (int(l[i][:2]) * 3600 +

1430 int(l[i][2:]) * 60) * signal)

1431 elif i + 1 < len_l and l[i + 1] == ':':

1432 # -03:00

1433 setattr(res, offattr,

1434 (int(l[i]) * 3600 +

1435 int(l[i + 2]) * 60) * signal)

1436 used_idxs.append(i)

1437 i += 2

1438 elif len_li <= 2:

1439 # -[0]3

1440 setattr(res, offattr,

1441 int(l[i][:2]) * 3600 * signal)

1442 else:

1443 return None

1444 used_idxs.append(i)

1445 i += 1

1446 if res.dstabbr:

1447 break

1448 else:

1449 break

1450

1451

1452 if i < len_l:

1453 for j in range(i, len_l):

1454 if l[j] == ';':

1455 l[j] = ','

1456

1457 assert l[i] == ','

1458

1459 i += 1

1460

1461 if i >= len_l:

1462 pass

1463 elif (8 <= l.count(',') <= 9 and

1464 not [y for x in l[i:] if x != ','

1465 for y in x if y not in "0123456789+-"]):

1466 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]

1467 for x in (res.start, res.end):

1468 x.month = int(l[i])

1469 used_idxs.append(i)

1470 i += 2

1471 if l[i] == '-':

1472 value = int(l[i + 1]) * -1

1473 used_idxs.append(i)

1474 i += 1

1475 else:

1476 value = int(l[i])

1477 used_idxs.append(i)

1478 i += 2

1479 if value:

1480 x.week = value

1481 x.weekday = (int(l[i]) - 1) % 7

1482 else:

1483 x.day = int(l[i])

1484 used_idxs.append(i)

1485 i += 2

1486 x.time = int(l[i])

1487 used_idxs.append(i)

1488 i += 2

1489 if i < len_l:

1490 if l[i] in ('-', '+'):

1491 signal = (-1, 1)[l[i] == "+"]

1492 used_idxs.append(i)

1493 i += 1

1494 else:

1495 signal = 1

1496 used_idxs.append(i)

1497 res.dstoffset = (res.stdoffset + int(l[i]) * signal)

1498

1499 # This was a made-up format that is not in normal use

1500 warn(('Parsed time zone "%s"' % tzstr) +

1501 'is in a non-standard dateutil-specific format, which ' +

1502 'is now deprecated; support for parsing this format ' +

1503 'will be removed in future versions. It is recommended ' +

1504 'that you switch to a standard format like the GNU ' +

1505 'TZ variable format.', tz.DeprecatedTzFormatWarning)

1506 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and

1507 not [y for x in l[i:] if x not in (',', '/', 'J', 'M',

1508 '.', '-', ':')

1509 for y in x if y not in "0123456789"]):

1510 for x in (res.start, res.end):

1511 if l[i] == 'J':

1512 # non-leap year day (1 based)

1513 used_idxs.append(i)

1514 i += 1

1515 x.jyday = int(l[i])

1516 elif l[i] == 'M':

1517 # month[-.]week[-.]weekday

1518 used_idxs.append(i)

1519 i += 1

1520 x.month = int(l[i])

1521 used_idxs.append(i)

1522 i += 1

1523 assert l[i] in ('-', '.')

1524 used_idxs.append(i)

1525 i += 1

1526 x.week = int(l[i])

1527 if x.week == 5:

1528 x.week = -1

1529 used_idxs.append(i)

1530 i += 1

1531 assert l[i] in ('-', '.')

1532 used_idxs.append(i)

1533 i += 1

1534 x.weekday = (int(l[i]) - 1) % 7

1535 else:

1536 # year day (zero based)

1537 x.yday = int(l[i]) + 1

1538

1539 used_idxs.append(i)

1540 i += 1

1541

1542 if i < len_l and l[i] == '/':

1543 used_idxs.append(i)

1544 i += 1

1545 # start time

1546 len_li = len(l[i])

1547 if len_li == 4:

1548 # -0300

1549 x.time = (int(l[i][:2]) * 3600 +

1550 int(l[i][2:]) * 60)

1551 elif i + 1 < len_l and l[i + 1] == ':':

1552 # -03:00

1553 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60

1554 used_idxs.append(i)

1555 i += 2

1556 if i + 1 < len_l and l[i + 1] == ':':

1557 used_idxs.append(i)

1558 i += 2

1559 x.time += int(l[i])

1560 elif len_li <= 2:

1561 # -[0]3

1562 x.time = (int(l[i][:2]) * 3600)

1563 else:

1564 return None

1565 used_idxs.append(i)

1566 i += 1

1567

1568 assert i == len_l or l[i] == ','

1569

1570 i += 1

1571

1572 assert i >= len_l

1573

1574 except (IndexError, ValueError, AssertionError):

1575 return None

1576

1577 unused_idxs = set(range(len_l)).difference(used_idxs)

1578 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})

1579 return res

1580

1581

1582DEFAULTTZPARSER = _tzparser()

1583

1584

1585def _parsetz(tzstr):

1586 return DEFAULTTZPARSER.parse(tzstr)

1587

1588

1589class ParserError(ValueError):

1590 """Exception subclass used for any failure to parse a datetime string.

1591

1592 This is a subclass of :py:exc:`ValueError`, and should be raised any time

1593 earlier versions of ``dateutil`` would have raised ``ValueError``.

1594

1595 .. versionadded:: 2.8.1

1596 """

1597 def __str__(self):

1598 try:

1599 return self.args[0] % self.args[1:]

1600 except (TypeError, IndexError):

1601 return super(ParserError, self).__str__()

1602

1603 def __repr__(self):

1604 args = ", ".join("'%s'" % arg for arg in self.args)

1605 return "%s(%s)" % (self.__class__.__name__, args)

1606

1607

1608class UnknownTimezoneWarning(RuntimeWarning):

1609 """Raised when the parser finds a timezone it cannot parse into a tzinfo.

1610

1611 .. versionadded:: 2.7.0

1612 """

1613# vim:ts=4:sw=4:et

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dateutil/parser/_parser.py: 15%

812 statements