Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dateutil/parser/_parser.py: 15%

812 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1# -*- coding: utf-8 -*- 

2""" 

3This module offers a generic date/time string parser which is able to parse 

4most known formats to represent a date and/or time. 

5 

6This module attempts to be forgiving with regards to unlikely input formats, 

7returning a datetime object even for dates which are ambiguous. If an element 

8of a date/time stamp is omitted, the following rules are applied: 

9 

10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 

11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 

12 specified. 

13- If a time zone is omitted, a timezone-naive datetime is returned. 

14 

15If any other elements are missing, they are taken from the 

16:class:`datetime.datetime` object passed to the parameter ``default``. If this 

17results in a day number exceeding the valid number of days per month, the 

18value falls back to the end of the month. 

19 

20Additional resources about date/time string formats can be found below: 

21 

22- `A summary of the international standard date and time notation 

23 <https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 

24- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_ 

25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 

26- `CPAN ParseDate module 

27 <https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 

28- `Java SimpleDateFormat Class 

29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 

30""" 

31from __future__ import unicode_literals 

32 

33import datetime 

34import re 

35import string 

36import time 

37import warnings 

38 

39from calendar import monthrange 

40from io import StringIO 

41 

42import six 

43from six import integer_types, text_type 

44 

45from decimal import Decimal 

46 

47from warnings import warn 

48 

49from .. import relativedelta 

50from .. import tz 

51 

52__all__ = ["parse", "parserinfo", "ParserError"] 

53 

54 

55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 

56# making public and/or figuring out if there is something we can 

57# take off their plate. 

58class _timelex(object): 

59 # Fractional seconds are sometimes split by a comma 

60 _split_decimal = re.compile("([.,])") 

61 

62 def __init__(self, instream): 

63 if isinstance(instream, (bytes, bytearray)): 

64 instream = instream.decode() 

65 

66 if isinstance(instream, text_type): 

67 instream = StringIO(instream) 

68 elif getattr(instream, 'read', None) is None: 

69 raise TypeError('Parser must be a string or character stream, not ' 

70 '{itype}'.format(itype=instream.__class__.__name__)) 

71 

72 self.instream = instream 

73 self.charstack = [] 

74 self.tokenstack = [] 

75 self.eof = False 

76 

77 def get_token(self): 

78 """ 

79 This function breaks the time string into lexical units (tokens), which 

80 can be parsed by the parser. Lexical units are demarcated by changes in 

81 the character set, so any continuous string of letters is considered 

82 one unit, any continuous string of numbers is considered one unit. 

83 

84 The main complication arises from the fact that dots ('.') can be used 

85 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 

86 "4:30:21.447"). As such, it is necessary to read the full context of 

87 any dot-separated strings before breaking it into tokens; as such, this 

88 function maintains a "token stack", for when the ambiguous context 

89 demands that multiple tokens be parsed at once. 

90 """ 

91 if self.tokenstack: 

92 return self.tokenstack.pop(0) 

93 

94 seenletters = False 

95 token = None 

96 state = None 

97 

98 while not self.eof: 

99 # We only realize that we've reached the end of a token when we 

100 # find a character that's not part of the current token - since 

101 # that character may be part of the next token, it's stored in the 

102 # charstack. 

103 if self.charstack: 

104 nextchar = self.charstack.pop(0) 

105 else: 

106 nextchar = self.instream.read(1) 

107 while nextchar == '\x00': 

108 nextchar = self.instream.read(1) 

109 

110 if not nextchar: 

111 self.eof = True 

112 break 

113 elif not state: 

114 # First character of the token - determines if we're starting 

115 # to parse a word, a number or something else. 

116 token = nextchar 

117 if self.isword(nextchar): 

118 state = 'a' 

119 elif self.isnum(nextchar): 

120 state = '0' 

121 elif self.isspace(nextchar): 

122 token = ' ' 

123 break # emit token 

124 else: 

125 break # emit token 

126 elif state == 'a': 

127 # If we've already started reading a word, we keep reading 

128 # letters until we find something that's not part of a word. 

129 seenletters = True 

130 if self.isword(nextchar): 

131 token += nextchar 

132 elif nextchar == '.': 

133 token += nextchar 

134 state = 'a.' 

135 else: 

136 self.charstack.append(nextchar) 

137 break # emit token 

138 elif state == '0': 

139 # If we've already started reading a number, we keep reading 

140 # numbers until we find something that doesn't fit. 

141 if self.isnum(nextchar): 

142 token += nextchar 

143 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 

144 token += nextchar 

145 state = '0.' 

146 else: 

147 self.charstack.append(nextchar) 

148 break # emit token 

149 elif state == 'a.': 

150 # If we've seen some letters and a dot separator, continue 

151 # parsing, and the tokens will be broken up later. 

152 seenletters = True 

153 if nextchar == '.' or self.isword(nextchar): 

154 token += nextchar 

155 elif self.isnum(nextchar) and token[-1] == '.': 

156 token += nextchar 

157 state = '0.' 

158 else: 

159 self.charstack.append(nextchar) 

160 break # emit token 

161 elif state == '0.': 

162 # If we've seen at least one dot separator, keep going, we'll 

163 # break up the tokens later. 

164 if nextchar == '.' or self.isnum(nextchar): 

165 token += nextchar 

166 elif self.isword(nextchar) and token[-1] == '.': 

167 token += nextchar 

168 state = 'a.' 

169 else: 

170 self.charstack.append(nextchar) 

171 break # emit token 

172 

173 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 

174 token[-1] in '.,')): 

175 l = self._split_decimal.split(token) 

176 token = l[0] 

177 for tok in l[1:]: 

178 if tok: 

179 self.tokenstack.append(tok) 

180 

181 if state == '0.' and token.count('.') == 0: 

182 token = token.replace(',', '.') 

183 

184 return token 

185 

186 def __iter__(self): 

187 return self 

188 

189 def __next__(self): 

190 token = self.get_token() 

191 if token is None: 

192 raise StopIteration 

193 

194 return token 

195 

196 def next(self): 

197 return self.__next__() # Python 2.x support 

198 

199 @classmethod 

200 def split(cls, s): 

201 return list(cls(s)) 

202 

203 @classmethod 

204 def isword(cls, nextchar): 

205 """ Whether or not the next character is part of a word """ 

206 return nextchar.isalpha() 

207 

208 @classmethod 

209 def isnum(cls, nextchar): 

210 """ Whether the next character is part of a number """ 

211 return nextchar.isdigit() 

212 

213 @classmethod 

214 def isspace(cls, nextchar): 

215 """ Whether the next character is whitespace """ 

216 return nextchar.isspace() 

217 

218 

219class _resultbase(object): 

220 

221 def __init__(self): 

222 for attr in self.__slots__: 

223 setattr(self, attr, None) 

224 

225 def _repr(self, classname): 

226 l = [] 

227 for attr in self.__slots__: 

228 value = getattr(self, attr) 

229 if value is not None: 

230 l.append("%s=%s" % (attr, repr(value))) 

231 return "%s(%s)" % (classname, ", ".join(l)) 

232 

233 def __len__(self): 

234 return (sum(getattr(self, attr) is not None 

235 for attr in self.__slots__)) 

236 

237 def __repr__(self): 

238 return self._repr(self.__class__.__name__) 

239 

240 

241class parserinfo(object): 

242 """ 

243 Class which handles what inputs are accepted. Subclass this to customize 

244 the language and acceptable values for each parameter. 

245 

246 :param dayfirst: 

247 Whether to interpret the first value in an ambiguous 3-integer date 

248 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 

249 ``yearfirst`` is set to ``True``, this distinguishes between YDM 

250 and YMD. Default is ``False``. 

251 

252 :param yearfirst: 

253 Whether to interpret the first value in an ambiguous 3-integer date 

254 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 

255 to be the year, otherwise the last number is taken to be the year. 

256 Default is ``False``. 

257 """ 

258 

259 # m from a.m/p.m, t from ISO T separator 

260 JUMP = [" ", ".", ",", ";", "-", "/", "'", 

261 "at", "on", "and", "ad", "m", "t", "of", 

262 "st", "nd", "rd", "th"] 

263 

264 WEEKDAYS = [("Mon", "Monday"), 

265 ("Tue", "Tuesday"), # TODO: "Tues" 

266 ("Wed", "Wednesday"), 

267 ("Thu", "Thursday"), # TODO: "Thurs" 

268 ("Fri", "Friday"), 

269 ("Sat", "Saturday"), 

270 ("Sun", "Sunday")] 

271 MONTHS = [("Jan", "January"), 

272 ("Feb", "February"), # TODO: "Febr" 

273 ("Mar", "March"), 

274 ("Apr", "April"), 

275 ("May", "May"), 

276 ("Jun", "June"), 

277 ("Jul", "July"), 

278 ("Aug", "August"), 

279 ("Sep", "Sept", "September"), 

280 ("Oct", "October"), 

281 ("Nov", "November"), 

282 ("Dec", "December")] 

283 HMS = [("h", "hour", "hours"), 

284 ("m", "minute", "minutes"), 

285 ("s", "second", "seconds")] 

286 AMPM = [("am", "a"), 

287 ("pm", "p")] 

288 UTCZONE = ["UTC", "GMT", "Z", "z"] 

289 PERTAIN = ["of"] 

290 TZOFFSET = {} 

291 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 

292 # "Anno Domini", "Year of Our Lord"] 

293 

294 def __init__(self, dayfirst=False, yearfirst=False): 

295 self._jump = self._convert(self.JUMP) 

296 self._weekdays = self._convert(self.WEEKDAYS) 

297 self._months = self._convert(self.MONTHS) 

298 self._hms = self._convert(self.HMS) 

299 self._ampm = self._convert(self.AMPM) 

300 self._utczone = self._convert(self.UTCZONE) 

301 self._pertain = self._convert(self.PERTAIN) 

302 

303 self.dayfirst = dayfirst 

304 self.yearfirst = yearfirst 

305 

306 self._year = time.localtime().tm_year 

307 self._century = self._year // 100 * 100 

308 

309 def _convert(self, lst): 

310 dct = {} 

311 for i, v in enumerate(lst): 

312 if isinstance(v, tuple): 

313 for v in v: 

314 dct[v.lower()] = i 

315 else: 

316 dct[v.lower()] = i 

317 return dct 

318 

319 def jump(self, name): 

320 return name.lower() in self._jump 

321 

322 def weekday(self, name): 

323 try: 

324 return self._weekdays[name.lower()] 

325 except KeyError: 

326 pass 

327 return None 

328 

329 def month(self, name): 

330 try: 

331 return self._months[name.lower()] + 1 

332 except KeyError: 

333 pass 

334 return None 

335 

336 def hms(self, name): 

337 try: 

338 return self._hms[name.lower()] 

339 except KeyError: 

340 return None 

341 

342 def ampm(self, name): 

343 try: 

344 return self._ampm[name.lower()] 

345 except KeyError: 

346 return None 

347 

348 def pertain(self, name): 

349 return name.lower() in self._pertain 

350 

351 def utczone(self, name): 

352 return name.lower() in self._utczone 

353 

354 def tzoffset(self, name): 

355 if name in self._utczone: 

356 return 0 

357 

358 return self.TZOFFSET.get(name) 

359 

360 def convertyear(self, year, century_specified=False): 

361 """ 

362 Converts two-digit years to year within [-50, 49] 

363 range of self._year (current local time) 

364 """ 

365 

366 # Function contract is that the year is always positive 

367 assert year >= 0 

368 

369 if year < 100 and not century_specified: 

370 # assume current century to start 

371 year += self._century 

372 

373 if year >= self._year + 50: # if too far in future 

374 year -= 100 

375 elif year < self._year - 50: # if too far in past 

376 year += 100 

377 

378 return year 

379 

380 def validate(self, res): 

381 # move to info 

382 if res.year is not None: 

383 res.year = self.convertyear(res.year, res.century_specified) 

384 

385 if ((res.tzoffset == 0 and not res.tzname) or 

386 (res.tzname == 'Z' or res.tzname == 'z')): 

387 res.tzname = "UTC" 

388 res.tzoffset = 0 

389 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 

390 res.tzoffset = 0 

391 return True 

392 

393 

394class _ymd(list): 

395 def __init__(self, *args, **kwargs): 

396 super(self.__class__, self).__init__(*args, **kwargs) 

397 self.century_specified = False 

398 self.dstridx = None 

399 self.mstridx = None 

400 self.ystridx = None 

401 

402 @property 

403 def has_year(self): 

404 return self.ystridx is not None 

405 

406 @property 

407 def has_month(self): 

408 return self.mstridx is not None 

409 

410 @property 

411 def has_day(self): 

412 return self.dstridx is not None 

413 

414 def could_be_day(self, value): 

415 if self.has_day: 

416 return False 

417 elif not self.has_month: 

418 return 1 <= value <= 31 

419 elif not self.has_year: 

420 # Be permissive, assume leap year 

421 month = self[self.mstridx] 

422 return 1 <= value <= monthrange(2000, month)[1] 

423 else: 

424 month = self[self.mstridx] 

425 year = self[self.ystridx] 

426 return 1 <= value <= monthrange(year, month)[1] 

427 

428 def append(self, val, label=None): 

429 if hasattr(val, '__len__'): 

430 if val.isdigit() and len(val) > 2: 

431 self.century_specified = True 

432 if label not in [None, 'Y']: # pragma: no cover 

433 raise ValueError(label) 

434 label = 'Y' 

435 elif val > 100: 

436 self.century_specified = True 

437 if label not in [None, 'Y']: # pragma: no cover 

438 raise ValueError(label) 

439 label = 'Y' 

440 

441 super(self.__class__, self).append(int(val)) 

442 

443 if label == 'M': 

444 if self.has_month: 

445 raise ValueError('Month is already set') 

446 self.mstridx = len(self) - 1 

447 elif label == 'D': 

448 if self.has_day: 

449 raise ValueError('Day is already set') 

450 self.dstridx = len(self) - 1 

451 elif label == 'Y': 

452 if self.has_year: 

453 raise ValueError('Year is already set') 

454 self.ystridx = len(self) - 1 

455 

456 def _resolve_from_stridxs(self, strids): 

457 """ 

458 Try to resolve the identities of year/month/day elements using 

459 ystridx, mstridx, and dstridx, if enough of these are specified. 

460 """ 

461 if len(self) == 3 and len(strids) == 2: 

462 # we can back out the remaining stridx value 

463 missing = [x for x in range(3) if x not in strids.values()] 

464 key = [x for x in ['y', 'm', 'd'] if x not in strids] 

465 assert len(missing) == len(key) == 1 

466 key = key[0] 

467 val = missing[0] 

468 strids[key] = val 

469 

470 assert len(self) == len(strids) # otherwise this should not be called 

471 out = {key: self[strids[key]] for key in strids} 

472 return (out.get('y'), out.get('m'), out.get('d')) 

473 

474 def resolve_ymd(self, yearfirst, dayfirst): 

475 len_ymd = len(self) 

476 year, month, day = (None, None, None) 

477 

478 strids = (('y', self.ystridx), 

479 ('m', self.mstridx), 

480 ('d', self.dstridx)) 

481 

482 strids = {key: val for key, val in strids if val is not None} 

483 if (len(self) == len(strids) > 0 or 

484 (len(self) == 3 and len(strids) == 2)): 

485 return self._resolve_from_stridxs(strids) 

486 

487 mstridx = self.mstridx 

488 

489 if len_ymd > 3: 

490 raise ValueError("More than three YMD values") 

491 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 

492 # One member, or two members with a month string 

493 if mstridx is not None: 

494 month = self[mstridx] 

495 # since mstridx is 0 or 1, self[mstridx-1] always 

496 # looks up the other element 

497 other = self[mstridx - 1] 

498 else: 

499 other = self[0] 

500 

501 if len_ymd > 1 or mstridx is None: 

502 if other > 31: 

503 year = other 

504 else: 

505 day = other 

506 

507 elif len_ymd == 2: 

508 # Two members with numbers 

509 if self[0] > 31: 

510 # 99-01 

511 year, month = self 

512 elif self[1] > 31: 

513 # 01-99 

514 month, year = self 

515 elif dayfirst and self[1] <= 12: 

516 # 13-01 

517 day, month = self 

518 else: 

519 # 01-13 

520 month, day = self 

521 

522 elif len_ymd == 3: 

523 # Three members 

524 if mstridx == 0: 

525 if self[1] > 31: 

526 # Apr-2003-25 

527 month, year, day = self 

528 else: 

529 month, day, year = self 

530 elif mstridx == 1: 

531 if self[0] > 31 or (yearfirst and self[2] <= 31): 

532 # 99-Jan-01 

533 year, month, day = self 

534 else: 

535 # 01-Jan-01 

536 # Give precedence to day-first, since 

537 # two-digit years is usually hand-written. 

538 day, month, year = self 

539 

540 elif mstridx == 2: 

541 # WTF!? 

542 if self[1] > 31: 

543 # 01-99-Jan 

544 day, year, month = self 

545 else: 

546 # 99-01-Jan 

547 year, day, month = self 

548 

549 else: 

550 if (self[0] > 31 or 

551 self.ystridx == 0 or 

552 (yearfirst and self[1] <= 12 and self[2] <= 31)): 

553 # 99-01-01 

554 if dayfirst and self[2] <= 12: 

555 year, day, month = self 

556 else: 

557 year, month, day = self 

558 elif self[0] > 12 or (dayfirst and self[1] <= 12): 

559 # 13-01-01 

560 day, month, year = self 

561 else: 

562 # 01-13-01 

563 month, day, year = self 

564 

565 return year, month, day 

566 

567 

568class parser(object): 

569 def __init__(self, info=None): 

570 self.info = info or parserinfo() 

571 

572 def parse(self, timestr, default=None, 

573 ignoretz=False, tzinfos=None, **kwargs): 

574 """ 

575 Parse the date/time string into a :class:`datetime.datetime` object. 

576 

577 :param timestr: 

578 Any date/time string using the supported formats. 

579 

580 :param default: 

581 The default datetime object, if this is a datetime object and not 

582 ``None``, elements specified in ``timestr`` replace elements in the 

583 default object. 

584 

585 :param ignoretz: 

586 If set ``True``, time zones in parsed strings are ignored and a 

587 naive :class:`datetime.datetime` object is returned. 

588 

589 :param tzinfos: 

590 Additional time zone names / aliases which may be present in the 

591 string. This argument maps time zone names (and optionally offsets 

592 from those time zones) to time zones. This parameter can be a 

593 dictionary with timezone aliases mapping time zone names to time 

594 zones or a function taking two parameters (``tzname`` and 

595 ``tzoffset``) and returning a time zone. 

596 

597 The timezones to which the names are mapped can be an integer 

598 offset from UTC in seconds or a :class:`tzinfo` object. 

599 

600 .. doctest:: 

601 :options: +NORMALIZE_WHITESPACE 

602 

603 >>> from dateutil.parser import parse 

604 >>> from dateutil.tz import gettz 

605 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 

606 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 

607 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 

608 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 

609 datetime.datetime(2012, 1, 19, 17, 21, 

610 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 

611 

612 This parameter is ignored if ``ignoretz`` is set. 

613 

614 :param \\*\\*kwargs: 

615 Keyword arguments as passed to ``_parse()``. 

616 

617 :return: 

618 Returns a :class:`datetime.datetime` object or, if the 

619 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 

620 first element being a :class:`datetime.datetime` object, the second 

621 a tuple containing the fuzzy tokens. 

622 

623 :raises ParserError: 

624 Raised for invalid or unknown string format, if the provided 

625 :class:`tzinfo` is not in a valid format, or if an invalid date 

626 would be created. 

627 

628 :raises TypeError: 

629 Raised for non-string or character stream input. 

630 

631 :raises OverflowError: 

632 Raised if the parsed date exceeds the largest valid C integer on 

633 your system. 

634 """ 

635 

636 if default is None: 

637 default = datetime.datetime.now().replace(hour=0, minute=0, 

638 second=0, microsecond=0) 

639 

640 res, skipped_tokens = self._parse(timestr, **kwargs) 

641 

642 if res is None: 

643 raise ParserError("Unknown string format: %s", timestr) 

644 

645 if len(res) == 0: 

646 raise ParserError("String does not contain a date: %s", timestr) 

647 

648 try: 

649 ret = self._build_naive(res, default) 

650 except ValueError as e: 

651 six.raise_from(ParserError(str(e) + ": %s", timestr), e) 

652 

653 if not ignoretz: 

654 ret = self._build_tzaware(ret, res, tzinfos) 

655 

656 if kwargs.get('fuzzy_with_tokens', False): 

657 return ret, skipped_tokens 

658 else: 

659 return ret 

660 

661 class _result(_resultbase): 

662 __slots__ = ["year", "month", "day", "weekday", 

663 "hour", "minute", "second", "microsecond", 

664 "tzname", "tzoffset", "ampm","any_unused_tokens"] 

665 

666 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 

667 fuzzy_with_tokens=False): 

668 """ 

669 Private method which performs the heavy lifting of parsing, called from 

670 ``parse()``, which passes on its ``kwargs`` to this function. 

671 

672 :param timestr: 

673 The string to parse. 

674 

675 :param dayfirst: 

676 Whether to interpret the first value in an ambiguous 3-integer date 

677 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 

678 ``yearfirst`` is set to ``True``, this distinguishes between YDM 

679 and YMD. If set to ``None``, this value is retrieved from the 

680 current :class:`parserinfo` object (which itself defaults to 

681 ``False``). 

682 

683 :param yearfirst: 

684 Whether to interpret the first value in an ambiguous 3-integer date 

685 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 

686 to be the year, otherwise the last number is taken to be the year. 

687 If this is set to ``None``, the value is retrieved from the current 

688 :class:`parserinfo` object (which itself defaults to ``False``). 

689 

690 :param fuzzy: 

691 Whether to allow fuzzy parsing, allowing for string like "Today is 

692 January 1, 2047 at 8:21:00AM". 

693 

694 :param fuzzy_with_tokens: 

695 If ``True``, ``fuzzy`` is automatically set to True, and the parser 

696 will return a tuple where the first element is the parsed 

697 :class:`datetime.datetime` datetimestamp and the second element is 

698 a tuple containing the portions of the string which were ignored: 

699 

700 .. doctest:: 

701 

702 >>> from dateutil.parser import parse 

703 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 

704 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 

705 

706 """ 

707 if fuzzy_with_tokens: 

708 fuzzy = True 

709 

710 info = self.info 

711 

712 if dayfirst is None: 

713 dayfirst = info.dayfirst 

714 

715 if yearfirst is None: 

716 yearfirst = info.yearfirst 

717 

718 res = self._result() 

719 l = _timelex.split(timestr) # Splits the timestr into tokens 

720 

721 skipped_idxs = [] 

722 

723 # year/month/day list 

724 ymd = _ymd() 

725 

726 len_l = len(l) 

727 i = 0 

728 try: 

729 while i < len_l: 

730 

731 # Check if it's a number 

732 value_repr = l[i] 

733 try: 

734 value = float(value_repr) 

735 except ValueError: 

736 value = None 

737 

738 if value is not None: 

739 # Numeric token 

740 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 

741 

742 # Check weekday 

743 elif info.weekday(l[i]) is not None: 

744 value = info.weekday(l[i]) 

745 res.weekday = value 

746 

747 # Check month name 

748 elif info.month(l[i]) is not None: 

749 value = info.month(l[i]) 

750 ymd.append(value, 'M') 

751 

752 if i + 1 < len_l: 

753 if l[i + 1] in ('-', '/'): 

754 # Jan-01[-99] 

755 sep = l[i + 1] 

756 ymd.append(l[i + 2]) 

757 

758 if i + 3 < len_l and l[i + 3] == sep: 

759 # Jan-01-99 

760 ymd.append(l[i + 4]) 

761 i += 2 

762 

763 i += 2 

764 

765 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 

766 info.pertain(l[i + 2])): 

767 # Jan of 01 

768 # In this case, 01 is clearly year 

769 if l[i + 4].isdigit(): 

770 # Convert it here to become unambiguous 

771 value = int(l[i + 4]) 

772 year = str(info.convertyear(value)) 

773 ymd.append(year, 'Y') 

774 else: 

775 # Wrong guess 

776 pass 

777 # TODO: not hit in tests 

778 i += 4 

779 

780 # Check am/pm 

781 elif info.ampm(l[i]) is not None: 

782 value = info.ampm(l[i]) 

783 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 

784 

785 if val_is_ampm: 

786 res.hour = self._adjust_ampm(res.hour, value) 

787 res.ampm = value 

788 

789 elif fuzzy: 

790 skipped_idxs.append(i) 

791 

792 # Check for a timezone name 

793 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 

794 res.tzname = l[i] 

795 res.tzoffset = info.tzoffset(res.tzname) 

796 

797 # Check for something like GMT+3, or BRST+3. Notice 

798 # that it doesn't mean "I am 3 hours after GMT", but 

799 # "my time +3 is GMT". If found, we reverse the 

800 # logic so that timezone parsing code will get it 

801 # right. 

802 if i + 1 < len_l and l[i + 1] in ('+', '-'): 

803 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 

804 res.tzoffset = None 

805 if info.utczone(res.tzname): 

806 # With something like GMT+3, the timezone 

807 # is *not* GMT. 

808 res.tzname = None 

809 

810 # Check for a numbered timezone 

811 elif res.hour is not None and l[i] in ('+', '-'): 

812 signal = (-1, 1)[l[i] == '+'] 

813 len_li = len(l[i + 1]) 

814 

815 # TODO: check that l[i + 1] is integer? 

816 if len_li == 4: 

817 # -0300 

818 hour_offset = int(l[i + 1][:2]) 

819 min_offset = int(l[i + 1][2:]) 

820 elif i + 2 < len_l and l[i + 2] == ':': 

821 # -03:00 

822 hour_offset = int(l[i + 1]) 

823 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 

824 i += 2 

825 elif len_li <= 2: 

826 # -[0]3 

827 hour_offset = int(l[i + 1][:2]) 

828 min_offset = 0 

829 else: 

830 raise ValueError(timestr) 

831 

832 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 

833 

834 # Look for a timezone name between parenthesis 

835 if (i + 5 < len_l and 

836 info.jump(l[i + 2]) and l[i + 3] == '(' and 

837 l[i + 5] == ')' and 

838 3 <= len(l[i + 4]) and 

839 self._could_be_tzname(res.hour, res.tzname, 

840 None, l[i + 4])): 

841 # -0300 (BRST) 

842 res.tzname = l[i + 4] 

843 i += 4 

844 

845 i += 1 

846 

847 # Check jumps 

848 elif not (info.jump(l[i]) or fuzzy): 

849 raise ValueError(timestr) 

850 

851 else: 

852 skipped_idxs.append(i) 

853 i += 1 

854 

855 # Process year/month/day 

856 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 

857 

858 res.century_specified = ymd.century_specified 

859 res.year = year 

860 res.month = month 

861 res.day = day 

862 

863 except (IndexError, ValueError): 

864 return None, None 

865 

866 if not info.validate(res): 

867 return None, None 

868 

869 if fuzzy_with_tokens: 

870 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 

871 return res, tuple(skipped_tokens) 

872 else: 

873 return res, None 

874 

875 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 

876 # Token is a number 

877 value_repr = tokens[idx] 

878 try: 

879 value = self._to_decimal(value_repr) 

880 except Exception as e: 

881 six.raise_from(ValueError('Unknown numeric token'), e) 

882 

883 len_li = len(value_repr) 

884 

885 len_l = len(tokens) 

886 

887 if (len(ymd) == 3 and len_li in (2, 4) and 

888 res.hour is None and 

889 (idx + 1 >= len_l or 

890 (tokens[idx + 1] != ':' and 

891 info.hms(tokens[idx + 1]) is None))): 

892 # 19990101T23[59] 

893 s = tokens[idx] 

894 res.hour = int(s[:2]) 

895 

896 if len_li == 4: 

897 res.minute = int(s[2:]) 

898 

899 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 

900 # YYMMDD or HHMMSS[.ss] 

901 s = tokens[idx] 

902 

903 if not ymd and '.' not in tokens[idx]: 

904 ymd.append(s[:2]) 

905 ymd.append(s[2:4]) 

906 ymd.append(s[4:]) 

907 else: 

908 # 19990101T235959[.59] 

909 

910 # TODO: Check if res attributes already set. 

911 res.hour = int(s[:2]) 

912 res.minute = int(s[2:4]) 

913 res.second, res.microsecond = self._parsems(s[4:]) 

914 

915 elif len_li in (8, 12, 14): 

916 # YYYYMMDD 

917 s = tokens[idx] 

918 ymd.append(s[:4], 'Y') 

919 ymd.append(s[4:6]) 

920 ymd.append(s[6:8]) 

921 

922 if len_li > 8: 

923 res.hour = int(s[8:10]) 

924 res.minute = int(s[10:12]) 

925 

926 if len_li > 12: 

927 res.second = int(s[12:]) 

928 

929 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 

930 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 

931 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 

932 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 

933 if hms is not None: 

934 # TODO: checking that hour/minute/second are not 

935 # already set? 

936 self._assign_hms(res, value_repr, hms) 

937 

938 elif idx + 2 < len_l and tokens[idx + 1] == ':': 

939 # HH:MM[:SS[.ss]] 

940 res.hour = int(value) 

941 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 

942 (res.minute, res.second) = self._parse_min_sec(value) 

943 

944 if idx + 4 < len_l and tokens[idx + 3] == ':': 

945 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 

946 

947 idx += 2 

948 

949 idx += 2 

950 

951 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 

952 sep = tokens[idx + 1] 

953 ymd.append(value_repr) 

954 

955 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 

956 if tokens[idx + 2].isdigit(): 

957 # 01-01[-01] 

958 ymd.append(tokens[idx + 2]) 

959 else: 

960 # 01-Jan[-01] 

961 value = info.month(tokens[idx + 2]) 

962 

963 if value is not None: 

964 ymd.append(value, 'M') 

965 else: 

966 raise ValueError() 

967 

968 if idx + 3 < len_l and tokens[idx + 3] == sep: 

969 # We have three members 

970 value = info.month(tokens[idx + 4]) 

971 

972 if value is not None: 

973 ymd.append(value, 'M') 

974 else: 

975 ymd.append(tokens[idx + 4]) 

976 idx += 2 

977 

978 idx += 1 

979 idx += 1 

980 

981 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 

982 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 

983 # 12 am 

984 hour = int(value) 

985 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 

986 idx += 1 

987 else: 

988 # Year, month or day 

989 ymd.append(value) 

990 idx += 1 

991 

992 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 

993 # 12am 

994 hour = int(value) 

995 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 

996 idx += 1 

997 

998 elif ymd.could_be_day(value): 

999 ymd.append(value) 

1000 

1001 elif not fuzzy: 

1002 raise ValueError() 

1003 

1004 return idx 

1005 

1006 def _find_hms_idx(self, idx, tokens, info, allow_jump): 

1007 len_l = len(tokens) 

1008 

1009 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 

1010 # There is an "h", "m", or "s" label following this token. We take 

1011 # assign the upcoming label to the current token. 

1012 # e.g. the "12" in 12h" 

1013 hms_idx = idx + 1 

1014 

1015 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 

1016 info.hms(tokens[idx+2]) is not None): 

1017 # There is a space and then an "h", "m", or "s" label. 

1018 # e.g. the "12" in "12 h" 

1019 hms_idx = idx + 2 

1020 

1021 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 

1022 # There is a "h", "m", or "s" preceding this token. Since neither 

1023 # of the previous cases was hit, there is no label following this 

1024 # token, so we use the previous label. 

1025 # e.g. the "04" in "12h04" 

1026 hms_idx = idx-1 

1027 

1028 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 

1029 info.hms(tokens[idx-2]) is not None): 

1030 # If we are looking at the final token, we allow for a 

1031 # backward-looking check to skip over a space. 

1032 # TODO: Are we sure this is the right condition here? 

1033 hms_idx = idx - 2 

1034 

1035 else: 

1036 hms_idx = None 

1037 

1038 return hms_idx 

1039 

1040 def _assign_hms(self, res, value_repr, hms): 

1041 # See GH issue #427, fixing float rounding 

1042 value = self._to_decimal(value_repr) 

1043 

1044 if hms == 0: 

1045 # Hour 

1046 res.hour = int(value) 

1047 if value % 1: 

1048 res.minute = int(60*(value % 1)) 

1049 

1050 elif hms == 1: 

1051 (res.minute, res.second) = self._parse_min_sec(value) 

1052 

1053 elif hms == 2: 

1054 (res.second, res.microsecond) = self._parsems(value_repr) 

1055 

1056 def _could_be_tzname(self, hour, tzname, tzoffset, token): 

1057 return (hour is not None and 

1058 tzname is None and 

1059 tzoffset is None and 

1060 len(token) <= 5 and 

1061 (all(x in string.ascii_uppercase for x in token) 

1062 or token in self.info.UTCZONE)) 

1063 

1064 def _ampm_valid(self, hour, ampm, fuzzy): 

1065 """ 

1066 For fuzzy parsing, 'a' or 'am' (both valid English words) 

1067 may erroneously trigger the AM/PM flag. Deal with that 

1068 here. 

1069 """ 

1070 val_is_ampm = True 

1071 

1072 # If there's already an AM/PM flag, this one isn't one. 

1073 if fuzzy and ampm is not None: 

1074 val_is_ampm = False 

1075 

1076 # If AM/PM is found and hour is not, raise a ValueError 

1077 if hour is None: 

1078 if fuzzy: 

1079 val_is_ampm = False 

1080 else: 

1081 raise ValueError('No hour specified with AM or PM flag.') 

1082 elif not 0 <= hour <= 12: 

1083 # If AM/PM is found, it's a 12 hour clock, so raise 

1084 # an error for invalid range 

1085 if fuzzy: 

1086 val_is_ampm = False 

1087 else: 

1088 raise ValueError('Invalid hour specified for 12-hour clock.') 

1089 

1090 return val_is_ampm 

1091 

1092 def _adjust_ampm(self, hour, ampm): 

1093 if hour < 12 and ampm == 1: 

1094 hour += 12 

1095 elif hour == 12 and ampm == 0: 

1096 hour = 0 

1097 return hour 

1098 

1099 def _parse_min_sec(self, value): 

1100 # TODO: Every usage of this function sets res.second to the return 

1101 # value. Are there any cases where second will be returned as None and 

1102 # we *don't* want to set res.second = None? 

1103 minute = int(value) 

1104 second = None 

1105 

1106 sec_remainder = value % 1 

1107 if sec_remainder: 

1108 second = int(60 * sec_remainder) 

1109 return (minute, second) 

1110 

1111 def _parse_hms(self, idx, tokens, info, hms_idx): 

1112 # TODO: Is this going to admit a lot of false-positives for when we 

1113 # just happen to have digits and "h", "m" or "s" characters in non-date 

1114 # text? I guess hex hashes won't have that problem, but there's plenty 

1115 # of random junk out there. 

1116 if hms_idx is None: 

1117 hms = None 

1118 new_idx = idx 

1119 elif hms_idx > idx: 

1120 hms = info.hms(tokens[hms_idx]) 

1121 new_idx = hms_idx 

1122 else: 

1123 # Looking backwards, increment one. 

1124 hms = info.hms(tokens[hms_idx]) + 1 

1125 new_idx = idx 

1126 

1127 return (new_idx, hms) 

1128 

1129 # ------------------------------------------------------------------ 

1130 # Handling for individual tokens. These are kept as methods instead 

1131 # of functions for the sake of customizability via subclassing. 

1132 

1133 def _parsems(self, value): 

1134 """Parse a I[.F] seconds value into (seconds, microseconds).""" 

1135 if "." not in value: 

1136 return int(value), 0 

1137 else: 

1138 i, f = value.split(".") 

1139 return int(i), int(f.ljust(6, "0")[:6]) 

1140 

1141 def _to_decimal(self, val): 

1142 try: 

1143 decimal_value = Decimal(val) 

1144 # See GH 662, edge case, infinite value should not be converted 

1145 # via `_to_decimal` 

1146 if not decimal_value.is_finite(): 

1147 raise ValueError("Converted decimal value is infinite or NaN") 

1148 except Exception as e: 

1149 msg = "Could not convert %s to decimal" % val 

1150 six.raise_from(ValueError(msg), e) 

1151 else: 

1152 return decimal_value 

1153 

1154 # ------------------------------------------------------------------ 

1155 # Post-Parsing construction of datetime output. These are kept as 

1156 # methods instead of functions for the sake of customizability via 

1157 # subclassing. 

1158 

1159 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 

1160 if callable(tzinfos): 

1161 tzdata = tzinfos(tzname, tzoffset) 

1162 else: 

1163 tzdata = tzinfos.get(tzname) 

1164 # handle case where tzinfo is paased an options that returns None 

1165 # eg tzinfos = {'BRST' : None} 

1166 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: 

1167 tzinfo = tzdata 

1168 elif isinstance(tzdata, text_type): 

1169 tzinfo = tz.tzstr(tzdata) 

1170 elif isinstance(tzdata, integer_types): 

1171 tzinfo = tz.tzoffset(tzname, tzdata) 

1172 else: 

1173 raise TypeError("Offset must be tzinfo subclass, tz string, " 

1174 "or int offset.") 

1175 return tzinfo 

1176 

1177 def _build_tzaware(self, naive, res, tzinfos): 

1178 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 

1179 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 

1180 aware = naive.replace(tzinfo=tzinfo) 

1181 aware = self._assign_tzname(aware, res.tzname) 

1182 

1183 elif res.tzname and res.tzname in time.tzname: 

1184 aware = naive.replace(tzinfo=tz.tzlocal()) 

1185 

1186 # Handle ambiguous local datetime 

1187 aware = self._assign_tzname(aware, res.tzname) 

1188 

1189 # This is mostly relevant for winter GMT zones parsed in the UK 

1190 if (aware.tzname() != res.tzname and 

1191 res.tzname in self.info.UTCZONE): 

1192 aware = aware.replace(tzinfo=tz.UTC) 

1193 

1194 elif res.tzoffset == 0: 

1195 aware = naive.replace(tzinfo=tz.UTC) 

1196 

1197 elif res.tzoffset: 

1198 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 

1199 

1200 elif not res.tzname and not res.tzoffset: 

1201 # i.e. no timezone information was found. 

1202 aware = naive 

1203 

1204 elif res.tzname: 

1205 # tz-like string was parsed but we don't know what to do 

1206 # with it 

1207 warnings.warn("tzname {tzname} identified but not understood. " 

1208 "Pass `tzinfos` argument in order to correctly " 

1209 "return a timezone-aware datetime. In a future " 

1210 "version, this will raise an " 

1211 "exception.".format(tzname=res.tzname), 

1212 category=UnknownTimezoneWarning) 

1213 aware = naive 

1214 

1215 return aware 

1216 

1217 def _build_naive(self, res, default): 

1218 repl = {} 

1219 for attr in ("year", "month", "day", "hour", 

1220 "minute", "second", "microsecond"): 

1221 value = getattr(res, attr) 

1222 if value is not None: 

1223 repl[attr] = value 

1224 

1225 if 'day' not in repl: 

1226 # If the default day exceeds the last day of the month, fall back 

1227 # to the end of the month. 

1228 cyear = default.year if res.year is None else res.year 

1229 cmonth = default.month if res.month is None else res.month 

1230 cday = default.day if res.day is None else res.day 

1231 

1232 if cday > monthrange(cyear, cmonth)[1]: 

1233 repl['day'] = monthrange(cyear, cmonth)[1] 

1234 

1235 naive = default.replace(**repl) 

1236 

1237 if res.weekday is not None and not res.day: 

1238 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 

1239 

1240 return naive 

1241 

1242 def _assign_tzname(self, dt, tzname): 

1243 if dt.tzname() != tzname: 

1244 new_dt = tz.enfold(dt, fold=1) 

1245 if new_dt.tzname() == tzname: 

1246 return new_dt 

1247 

1248 return dt 

1249 

1250 def _recombine_skipped(self, tokens, skipped_idxs): 

1251 """ 

1252 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 

1253 >>> skipped_idxs = [0, 1, 2, 5] 

1254 >>> _recombine_skipped(tokens, skipped_idxs) 

1255 ["foo bar", "baz"] 

1256 """ 

1257 skipped_tokens = [] 

1258 for i, idx in enumerate(sorted(skipped_idxs)): 

1259 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 

1260 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 

1261 else: 

1262 skipped_tokens.append(tokens[idx]) 

1263 

1264 return skipped_tokens 

1265 

1266 

1267DEFAULTPARSER = parser() 

1268 

1269 

1270def parse(timestr, parserinfo=None, **kwargs): 

1271 """ 

1272 

1273 Parse a string in one of the supported formats, using the 

1274 ``parserinfo`` parameters. 

1275 

1276 :param timestr: 

1277 A string containing a date/time stamp. 

1278 

1279 :param parserinfo: 

1280 A :class:`parserinfo` object containing parameters for the parser. 

1281 If ``None``, the default arguments to the :class:`parserinfo` 

1282 constructor are used. 

1283 

1284 The ``**kwargs`` parameter takes the following keyword arguments: 

1285 

1286 :param default: 

1287 The default datetime object, if this is a datetime object and not 

1288 ``None``, elements specified in ``timestr`` replace elements in the 

1289 default object. 

1290 

1291 :param ignoretz: 

1292 If set ``True``, time zones in parsed strings are ignored and a naive 

1293 :class:`datetime` object is returned. 

1294 

1295 :param tzinfos: 

1296 Additional time zone names / aliases which may be present in the 

1297 string. This argument maps time zone names (and optionally offsets 

1298 from those time zones) to time zones. This parameter can be a 

1299 dictionary with timezone aliases mapping time zone names to time 

1300 zones or a function taking two parameters (``tzname`` and 

1301 ``tzoffset``) and returning a time zone. 

1302 

1303 The timezones to which the names are mapped can be an integer 

1304 offset from UTC in seconds or a :class:`tzinfo` object. 

1305 

1306 .. doctest:: 

1307 :options: +NORMALIZE_WHITESPACE 

1308 

1309 >>> from dateutil.parser import parse 

1310 >>> from dateutil.tz import gettz 

1311 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 

1312 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 

1313 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 

1314 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 

1315 datetime.datetime(2012, 1, 19, 17, 21, 

1316 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 

1317 

1318 This parameter is ignored if ``ignoretz`` is set. 

1319 

1320 :param dayfirst: 

1321 Whether to interpret the first value in an ambiguous 3-integer date 

1322 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 

1323 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 

1324 YMD. If set to ``None``, this value is retrieved from the current 

1325 :class:`parserinfo` object (which itself defaults to ``False``). 

1326 

1327 :param yearfirst: 

1328 Whether to interpret the first value in an ambiguous 3-integer date 

1329 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 

1330 be the year, otherwise the last number is taken to be the year. If 

1331 this is set to ``None``, the value is retrieved from the current 

1332 :class:`parserinfo` object (which itself defaults to ``False``). 

1333 

1334 :param fuzzy: 

1335 Whether to allow fuzzy parsing, allowing for string like "Today is 

1336 January 1, 2047 at 8:21:00AM". 

1337 

1338 :param fuzzy_with_tokens: 

1339 If ``True``, ``fuzzy`` is automatically set to True, and the parser 

1340 will return a tuple where the first element is the parsed 

1341 :class:`datetime.datetime` datetimestamp and the second element is 

1342 a tuple containing the portions of the string which were ignored: 

1343 

1344 .. doctest:: 

1345 

1346 >>> from dateutil.parser import parse 

1347 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 

1348 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 

1349 

1350 :return: 

1351 Returns a :class:`datetime.datetime` object or, if the 

1352 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 

1353 first element being a :class:`datetime.datetime` object, the second 

1354 a tuple containing the fuzzy tokens. 

1355 

1356 :raises ParserError: 

1357 Raised for invalid or unknown string formats, if the provided 

1358 :class:`tzinfo` is not in a valid format, or if an invalid date would 

1359 be created. 

1360 

1361 :raises OverflowError: 

1362 Raised if the parsed date exceeds the largest valid C integer on 

1363 your system. 

1364 """ 

1365 if parserinfo: 

1366 return parser(parserinfo).parse(timestr, **kwargs) 

1367 else: 

1368 return DEFAULTPARSER.parse(timestr, **kwargs) 

1369 

1370 

1371class _tzparser(object): 

1372 

1373 class _result(_resultbase): 

1374 

1375 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 

1376 "start", "end"] 

1377 

1378 class _attr(_resultbase): 

1379 __slots__ = ["month", "week", "weekday", 

1380 "yday", "jyday", "day", "time"] 

1381 

1382 def __repr__(self): 

1383 return self._repr("") 

1384 

1385 def __init__(self): 

1386 _resultbase.__init__(self) 

1387 self.start = self._attr() 

1388 self.end = self._attr() 

1389 

1390 def parse(self, tzstr): 

1391 res = self._result() 

1392 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 

1393 used_idxs = list() 

1394 try: 

1395 

1396 len_l = len(l) 

1397 

1398 i = 0 

1399 while i < len_l: 

1400 # BRST+3[BRDT[+2]] 

1401 j = i 

1402 while j < len_l and not [x for x in l[j] 

1403 if x in "0123456789:,-+"]: 

1404 j += 1 

1405 if j != i: 

1406 if not res.stdabbr: 

1407 offattr = "stdoffset" 

1408 res.stdabbr = "".join(l[i:j]) 

1409 else: 

1410 offattr = "dstoffset" 

1411 res.dstabbr = "".join(l[i:j]) 

1412 

1413 for ii in range(j): 

1414 used_idxs.append(ii) 

1415 i = j 

1416 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 

1417 "0123456789")): 

1418 if l[i] in ('+', '-'): 

1419 # Yes, that's right. See the TZ variable 

1420 # documentation. 

1421 signal = (1, -1)[l[i] == '+'] 

1422 used_idxs.append(i) 

1423 i += 1 

1424 else: 

1425 signal = -1 

1426 len_li = len(l[i]) 

1427 if len_li == 4: 

1428 # -0300 

1429 setattr(res, offattr, (int(l[i][:2]) * 3600 + 

1430 int(l[i][2:]) * 60) * signal) 

1431 elif i + 1 < len_l and l[i + 1] == ':': 

1432 # -03:00 

1433 setattr(res, offattr, 

1434 (int(l[i]) * 3600 + 

1435 int(l[i + 2]) * 60) * signal) 

1436 used_idxs.append(i) 

1437 i += 2 

1438 elif len_li <= 2: 

1439 # -[0]3 

1440 setattr(res, offattr, 

1441 int(l[i][:2]) * 3600 * signal) 

1442 else: 

1443 return None 

1444 used_idxs.append(i) 

1445 i += 1 

1446 if res.dstabbr: 

1447 break 

1448 else: 

1449 break 

1450 

1451 

1452 if i < len_l: 

1453 for j in range(i, len_l): 

1454 if l[j] == ';': 

1455 l[j] = ',' 

1456 

1457 assert l[i] == ',' 

1458 

1459 i += 1 

1460 

1461 if i >= len_l: 

1462 pass 

1463 elif (8 <= l.count(',') <= 9 and 

1464 not [y for x in l[i:] if x != ',' 

1465 for y in x if y not in "0123456789+-"]): 

1466 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 

1467 for x in (res.start, res.end): 

1468 x.month = int(l[i]) 

1469 used_idxs.append(i) 

1470 i += 2 

1471 if l[i] == '-': 

1472 value = int(l[i + 1]) * -1 

1473 used_idxs.append(i) 

1474 i += 1 

1475 else: 

1476 value = int(l[i]) 

1477 used_idxs.append(i) 

1478 i += 2 

1479 if value: 

1480 x.week = value 

1481 x.weekday = (int(l[i]) - 1) % 7 

1482 else: 

1483 x.day = int(l[i]) 

1484 used_idxs.append(i) 

1485 i += 2 

1486 x.time = int(l[i]) 

1487 used_idxs.append(i) 

1488 i += 2 

1489 if i < len_l: 

1490 if l[i] in ('-', '+'): 

1491 signal = (-1, 1)[l[i] == "+"] 

1492 used_idxs.append(i) 

1493 i += 1 

1494 else: 

1495 signal = 1 

1496 used_idxs.append(i) 

1497 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 

1498 

1499 # This was a made-up format that is not in normal use 

1500 warn(('Parsed time zone "%s"' % tzstr) + 

1501 'is in a non-standard dateutil-specific format, which ' + 

1502 'is now deprecated; support for parsing this format ' + 

1503 'will be removed in future versions. It is recommended ' + 

1504 'that you switch to a standard format like the GNU ' + 

1505 'TZ variable format.', tz.DeprecatedTzFormatWarning) 

1506 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 

1507 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 

1508 '.', '-', ':') 

1509 for y in x if y not in "0123456789"]): 

1510 for x in (res.start, res.end): 

1511 if l[i] == 'J': 

1512 # non-leap year day (1 based) 

1513 used_idxs.append(i) 

1514 i += 1 

1515 x.jyday = int(l[i]) 

1516 elif l[i] == 'M': 

1517 # month[-.]week[-.]weekday 

1518 used_idxs.append(i) 

1519 i += 1 

1520 x.month = int(l[i]) 

1521 used_idxs.append(i) 

1522 i += 1 

1523 assert l[i] in ('-', '.') 

1524 used_idxs.append(i) 

1525 i += 1 

1526 x.week = int(l[i]) 

1527 if x.week == 5: 

1528 x.week = -1 

1529 used_idxs.append(i) 

1530 i += 1 

1531 assert l[i] in ('-', '.') 

1532 used_idxs.append(i) 

1533 i += 1 

1534 x.weekday = (int(l[i]) - 1) % 7 

1535 else: 

1536 # year day (zero based) 

1537 x.yday = int(l[i]) + 1 

1538 

1539 used_idxs.append(i) 

1540 i += 1 

1541 

1542 if i < len_l and l[i] == '/': 

1543 used_idxs.append(i) 

1544 i += 1 

1545 # start time 

1546 len_li = len(l[i]) 

1547 if len_li == 4: 

1548 # -0300 

1549 x.time = (int(l[i][:2]) * 3600 + 

1550 int(l[i][2:]) * 60) 

1551 elif i + 1 < len_l and l[i + 1] == ':': 

1552 # -03:00 

1553 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 

1554 used_idxs.append(i) 

1555 i += 2 

1556 if i + 1 < len_l and l[i + 1] == ':': 

1557 used_idxs.append(i) 

1558 i += 2 

1559 x.time += int(l[i]) 

1560 elif len_li <= 2: 

1561 # -[0]3 

1562 x.time = (int(l[i][:2]) * 3600) 

1563 else: 

1564 return None 

1565 used_idxs.append(i) 

1566 i += 1 

1567 

1568 assert i == len_l or l[i] == ',' 

1569 

1570 i += 1 

1571 

1572 assert i >= len_l 

1573 

1574 except (IndexError, ValueError, AssertionError): 

1575 return None 

1576 

1577 unused_idxs = set(range(len_l)).difference(used_idxs) 

1578 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 

1579 return res 

1580 

1581 

1582DEFAULTTZPARSER = _tzparser() 

1583 

1584 

1585def _parsetz(tzstr): 

1586 return DEFAULTTZPARSER.parse(tzstr) 

1587 

1588 

1589class ParserError(ValueError): 

1590 """Exception subclass used for any failure to parse a datetime string. 

1591 

1592 This is a subclass of :py:exc:`ValueError`, and should be raised any time 

1593 earlier versions of ``dateutil`` would have raised ``ValueError``. 

1594 

1595 .. versionadded:: 2.8.1 

1596 """ 

1597 def __str__(self): 

1598 try: 

1599 return self.args[0] % self.args[1:] 

1600 except (TypeError, IndexError): 

1601 return super(ParserError, self).__str__() 

1602 

1603 def __repr__(self): 

1604 args = ", ".join("'%s'" % arg for arg in self.args) 

1605 return "%s(%s)" % (self.__class__.__name__, args) 

1606 

1607 

1608class UnknownTimezoneWarning(RuntimeWarning): 

1609 """Raised when the parser finds a timezone it cannot parse into a tzinfo. 

1610 

1611 .. versionadded:: 2.7.0 

1612 """ 

1613# vim:ts=4:sw=4:et