Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dateparser/parser.py: 91%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

417 statements  

1import calendar 

2from collections import OrderedDict 

3from datetime import datetime, timedelta, timezone 

4from io import StringIO 

5 

6import pytz 

7import regex as re 

8 

9from dateparser.utils import ( 

10 _get_missing_parts, 

11 get_last_day_of_month, 

12 get_next_leap_year, 

13 get_previous_leap_year, 

14 get_timezone_from_tz_string, 

15 set_correct_day_from_settings, 

16 set_correct_month_from_settings, 

17) 

18from dateparser.utils.strptime import strptime 

19 

20NSP_COMPATIBLE = re.compile(r"\D+") 

21MERIDIAN = re.compile(r"am|pm") 

22MICROSECOND = re.compile(r"\d{1,6}") 

23EIGHT_DIGIT = re.compile(r"^\d{8}$") 

24HOUR_MINUTE_REGEX = re.compile(r"^([0-9]|0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$") 

25 

26 

27def no_space_parser_eligibile(datestring): 

28 src = NSP_COMPATIBLE.search(datestring) 

29 if not src or ":" == src.group(): 

30 return True 

31 return False 

32 

33 

34def get_unresolved_attrs(parser_object): 

35 attrs = ["year", "month", "day"] 

36 seen = [] 

37 unseen = [] 

38 for attr in attrs: 

39 if getattr(parser_object, attr, None) is not None: 

40 seen.append(attr) 

41 else: 

42 unseen.append(attr) 

43 return seen, unseen 

44 

45 

46date_order_chart = { 

47 "DMY": "%d%m%y", 

48 "DYM": "%d%y%m", 

49 "MDY": "%m%d%y", 

50 "MYD": "%m%y%d", 

51 "YDM": "%y%d%m", 

52 "YMD": "%y%m%d", 

53} 

54 

55 

56def resolve_date_order(order, lst=None): 

57 chart_list = { 

58 "DMY": ["day", "month", "year"], 

59 "DYM": ["day", "year", "month"], 

60 "MDY": ["month", "day", "year"], 

61 "MYD": ["month", "year", "day"], 

62 "YDM": ["year", "day", "month"], 

63 "YMD": ["year", "month", "day"], 

64 } 

65 

66 return chart_list[order] if lst else date_order_chart[order] 

67 

68 

69def _parse_absolute(datestring, settings, tz=None): 

70 return _parser.parse(datestring, settings, tz) 

71 

72 

73def _parse_nospaces(datestring, settings, tz=None): 

74 return _no_spaces_parser.parse(datestring, settings) 

75 

76 

77class _time_parser: 

78 time_directives = [ 

79 "%H:%M:%S", 

80 "%I:%M:%S %p", 

81 "%H:%M", 

82 "%I:%M %p", 

83 "%I %p", 

84 "%H:%M:%S.%f", 

85 "%I:%M:%S.%f %p", 

86 "%H:%M %p", 

87 ] 

88 

89 def __call__(self, timestring): 

90 _timestring = timestring 

91 for directive in self.time_directives: 

92 try: 

93 return strptime(timestring.strip(), directive).time() 

94 except ValueError: 

95 pass 

96 else: 

97 raise ValueError("%s does not seem to be a valid time string" % _timestring) 

98 

99 

100time_parser = _time_parser() 

101 

102 

103class _no_spaces_parser: 

104 _dateformats = [ 

105 "%Y%m%d", 

106 "%Y%d%m", 

107 "%m%Y%d", 

108 "%m%d%Y", 

109 "%d%Y%m", 

110 "%d%m%Y", 

111 "%y%m%d", 

112 "%y%d%m", 

113 "%m%y%d", 

114 "%m%d%y", 

115 "%d%y%m", 

116 "%d%m%y", 

117 ] 

118 

119 _preferred_formats = ["%Y%m%d%H%M", "%Y%m%d%H%M%S", "%Y%m%d%H%M%S.%f"] 

120 

121 _preferred_formats_ordered_8_digit = [ 

122 "%m%d%Y", 

123 "%d%m%Y", 

124 "%Y%m%d", 

125 "%Y%d%m", 

126 "%m%Y%d", 

127 "%d%Y%m", 

128 ] 

129 

130 _timeformats = ["%H%M%S.%f", "%H%M%S", "%H%M", "%H"] 

131 

132 period = {"day": ["%d", "%H", "%M", "%S"], "month": ["%m"]} 

133 

134 _default_order = resolve_date_order("MDY") 

135 

136 def __init__(self, *args, **kwargs): 

137 self._all = ( 

138 self._dateformats 

139 + [x + y for x in self._dateformats for y in self._timeformats] 

140 + self._timeformats 

141 ) 

142 

143 self.date_formats = { 

144 "%m%d%y": ( 

145 self._preferred_formats 

146 + sorted( 

147 self._all, 

148 key=lambda x: x.lower().startswith("%m%d%y"), 

149 reverse=True, 

150 ) 

151 ), 

152 "%m%y%d": sorted( 

153 self._all, key=lambda x: x.lower().startswith("%m%y%d"), reverse=True 

154 ), 

155 "%y%m%d": sorted( 

156 self._all, key=lambda x: x.lower().startswith("%y%m%d"), reverse=True 

157 ), 

158 "%y%d%m": sorted( 

159 self._all, key=lambda x: x.lower().startswith("%y%d%m"), reverse=True 

160 ), 

161 "%d%m%y": sorted( 

162 self._all, key=lambda x: x.lower().startswith("%d%m%y"), reverse=True 

163 ), 

164 "%d%y%m": sorted( 

165 self._all, key=lambda x: x.lower().startswith("%d%y%m"), reverse=True 

166 ), 

167 } 

168 

169 @classmethod 

170 def _get_period(cls, format_string): 

171 for pname, pdrv in sorted(cls.period.items(), key=lambda x: x[0]): 

172 for drv in pdrv: 

173 if drv in format_string: 

174 return pname 

175 else: 

176 return "year" 

177 

178 @classmethod 

179 def _find_best_matching_date(cls, datestring): 

180 for fmt in cls._preferred_formats_ordered_8_digit: 

181 try: 

182 dt = strptime(datestring, fmt), cls._get_period(fmt) 

183 if len(str(dt[0].year)) == 4: 

184 return dt 

185 except: 

186 pass 

187 return None 

188 

189 @classmethod 

190 def parse(cls, datestring, settings): 

191 if not no_space_parser_eligibile(datestring): 

192 raise ValueError("Unable to parse date from: %s" % datestring) 

193 

194 datestring = datestring.replace(":", "") 

195 if not datestring: 

196 raise ValueError("Empty string") 

197 tokens = tokenizer(datestring) 

198 if settings.DATE_ORDER: 

199 order = resolve_date_order(settings.DATE_ORDER) 

200 else: 

201 order = cls._default_order 

202 if EIGHT_DIGIT.match(datestring): 

203 dt = cls._find_best_matching_date(datestring) 

204 if dt is not None: 

205 return dt 

206 nsp = cls() 

207 ambiguous_date = None 

208 for token, _ in tokens.tokenize(): 

209 for fmt in nsp.date_formats[order]: 

210 try: 

211 dt = strptime(token, fmt), cls._get_period(fmt) 

212 if len(str(dt[0].year)) < 4: 

213 ambiguous_date = dt 

214 continue 

215 

216 missing = _get_missing_parts(fmt) 

217 _check_strict_parsing(missing, settings) 

218 return dt 

219 except: 

220 pass 

221 else: 

222 if ambiguous_date: 

223 return ambiguous_date 

224 else: 

225 raise ValueError("Unable to parse date from: %s" % datestring) 

226 

227 

228def _get_missing_error(missing): 

229 return "Fields missing from the date string: {}".format(", ".join(missing)) 

230 

231 

232def _check_strict_parsing(missing, settings): 

233 if settings.STRICT_PARSING and missing: 

234 raise ValueError(_get_missing_error(missing)) 

235 elif settings.REQUIRE_PARTS and missing: 

236 errors = [part for part in settings.REQUIRE_PARTS if part in missing] 

237 if errors: 

238 raise ValueError(_get_missing_error(errors)) 

239 

240 

241class _parser: 

242 alpha_directives = OrderedDict( 

243 [ 

244 ("weekday", ["%A", "%a"]), 

245 ("month", ["%B", "%b"]), 

246 ] 

247 ) 

248 

249 num_directives = { 

250 "month": ["%m"], 

251 "day": ["%d"], 

252 "year": ["%y", "%Y"], 

253 } 

254 

255 def __init__(self, tokens, settings): 

256 self.settings = settings 

257 self.tokens = [(t[0].strip(), t[1]) for t in list(tokens)] 

258 self.filtered_tokens = [ 

259 (t[0], t[1], i) for i, t in enumerate(self.tokens) if t[1] <= 1 

260 ] 

261 

262 self.unset_tokens = [] 

263 

264 self.day = None 

265 self.month = None 

266 self.year = None 

267 self.time = None 

268 

269 self.auto_order = [] 

270 

271 self._token_day = None 

272 self._token_month = None 

273 self._token_year = None 

274 self._token_time = None 

275 

276 self.ordered_num_directives = OrderedDict( 

277 (k, self.num_directives[k]) 

278 for k in (resolve_date_order(settings.DATE_ORDER, lst=True)) 

279 ) 

280 

281 skip_index = [] 

282 skip_component = None 

283 skip_tokens = ["t", "year", "hour", "minute"] 

284 

285 for index, token_type_original_index in enumerate(self.filtered_tokens): 

286 if index in skip_index: 

287 continue 

288 

289 token, type, original_index = token_type_original_index 

290 

291 if token in skip_tokens: 

292 continue 

293 

294 if self.time is None: 

295 meridian_index = index + 1 

296 

297 try: 

298 # try case where hours and minutes are separated by a period. Example: 13.20. 

299 _is_before_period = self.tokens[original_index + 1][0] == "." 

300 _is_after_period = ( 

301 original_index != 0 

302 and self.tokens[original_index - 1][0] == "." 

303 ) 

304 

305 if _is_before_period and not _is_after_period: 

306 index_next_token = index + 1 

307 next_token = self.filtered_tokens[index_next_token][0] 

308 index_in_tokens_for_next_token = self.filtered_tokens[ 

309 index_next_token 

310 ][2] 

311 

312 next_token_is_last = ( 

313 index_next_token == len(self.filtered_tokens) - 1 

314 ) 

315 if ( 

316 next_token_is_last 

317 or self.tokens[index_in_tokens_for_next_token + 1][0] != "." 

318 ): 

319 new_token = token + ":" + next_token 

320 if re.match(HOUR_MINUTE_REGEX, new_token): 

321 token = new_token 

322 skip_index.append(index + 1) 

323 meridian_index += 1 

324 except Exception: 

325 pass 

326 

327 try: 

328 microsecond = MICROSECOND.search( 

329 self.filtered_tokens[index + 1][0] 

330 ).group() 

331 # Is after time token? raise ValueError if ':' can't be found: 

332 token.index(":") 

333 # Is after period? raise ValueError if '.' can't be found: 

334 self.tokens[self.tokens.index((token, 0)) + 1][0].index(".") 

335 except: 

336 microsecond = None 

337 

338 if microsecond: 

339 meridian_index += 1 

340 

341 try: 

342 meridian = MERIDIAN.search( 

343 self.filtered_tokens[meridian_index][0] 

344 ).group() 

345 except: 

346 meridian = None 

347 

348 if any([":" in token, meridian, microsecond]): 

349 if meridian and not microsecond: 

350 self._token_time = "%s %s" % (token, meridian) 

351 skip_index.append(meridian_index) 

352 elif microsecond and not meridian: 

353 self._token_time = "%s.%s" % (token, microsecond) 

354 skip_index.append(index + 1) 

355 elif meridian and microsecond: 

356 self._token_time = "%s.%s %s" % (token, microsecond, meridian) 

357 skip_index.append(index + 1) 

358 skip_index.append(meridian_index) 

359 else: 

360 self._token_time = token 

361 self.time = lambda: time_parser(self._token_time) 

362 continue 

363 

364 results = self._parse(type, token, skip_component=skip_component) 

365 for res in results: 

366 if len(token) == 4 and res[0] == "year": 

367 skip_component = "year" 

368 setattr(self, *res) 

369 

370 known, unknown = get_unresolved_attrs(self) 

371 params = {} 

372 for attr in known: 

373 params.update({attr: getattr(self, attr)}) 

374 for attr in unknown: 

375 for token, type, _ in self.unset_tokens: 

376 if type == 0: 

377 params.update({attr: int(token)}) 

378 setattr(self, "_token_%s" % attr, token) 

379 setattr(self, attr, int(token)) 

380 

381 def _get_period(self): 

382 if self.settings.RETURN_TIME_AS_PERIOD: 

383 if getattr(self, "time", None): 

384 return "time" 

385 

386 for period in ["time", "day"]: 

387 if getattr(self, period, None): 

388 return "day" 

389 

390 for period in ["month", "year"]: 

391 if getattr(self, period, None): 

392 return period 

393 

394 if self._results(): 

395 return "day" 

396 

397 def _get_datetime_obj(self, **params): 

398 try: 

399 return datetime(**params) 

400 except ValueError as e: 

401 error_text = e.__str__() 

402 error_msgs = ["day is out of range", "day must be in"] 

403 if error_msgs[0] in error_text or error_msgs[1] in error_text: 

404 if not (self._token_day or hasattr(self, "_token_weekday")): 

405 # if day is not available put last day of the month 

406 params["day"] = get_last_day_of_month( 

407 params["year"], params["month"] 

408 ) 

409 return datetime(**params) 

410 elif ( 

411 not self._token_year 

412 and params["day"] == 29 

413 and params["month"] == 2 

414 and not calendar.isleap(params["year"]) 

415 ): 

416 # fix the year when year is not present and it is 29 of February 

417 params["year"] = self._get_correct_leap_year( 

418 self.settings.PREFER_DATES_FROM, params["year"] 

419 ) 

420 return datetime(**params) 

421 raise e 

422 

423 def _get_correct_leap_year(self, prefer_dates_from, current_year): 

424 if prefer_dates_from == "future": 

425 return get_next_leap_year(current_year) 

426 if prefer_dates_from == "past": 

427 return get_previous_leap_year(current_year) 

428 

429 # Default case ('current_period'): return closer leap year 

430 next_leap_year = get_next_leap_year(current_year) 

431 previous_leap_year = get_previous_leap_year(current_year) 

432 next_leap_year_is_closer = ( 

433 next_leap_year - current_year < current_year - previous_leap_year 

434 ) 

435 return next_leap_year if next_leap_year_is_closer else previous_leap_year 

436 

437 def _set_relative_base(self): 

438 self.now = self.settings.RELATIVE_BASE 

439 if not self.now: 

440 self.now = datetime.now(tz=timezone.utc).replace(tzinfo=None) 

441 

442 def _get_datetime_obj_params(self): 

443 if not self.now: 

444 self._set_relative_base() 

445 

446 params = { 

447 "day": self.day or self.now.day, 

448 "month": self.month or self.now.month, 

449 "year": self.year or self.now.year, 

450 "hour": 0, 

451 "minute": 0, 

452 "second": 0, 

453 "microsecond": 0, 

454 } 

455 return params 

456 

457 def _get_date_obj(self, token, directive): 

458 return strptime(token, directive) 

459 

460 def _results(self): 

461 missing = [ 

462 field for field in ("day", "month", "year") if not getattr(self, field) 

463 ] 

464 _check_strict_parsing(missing, self.settings) 

465 self._set_relative_base() 

466 

467 time = self.time() if self.time is not None else None 

468 params = self._get_datetime_obj_params() 

469 

470 if time: 

471 params.update( 

472 dict( 

473 hour=time.hour, 

474 minute=time.minute, 

475 second=time.second, 

476 microsecond=time.microsecond, 

477 ) 

478 ) 

479 

480 return self._get_datetime_obj(**params) 

481 

482 def _correct_for_time_frame(self, dateobj, tz): 

483 days = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] 

484 

485 token_weekday, _ = getattr(self, "_token_weekday", (None, None)) 

486 

487 if token_weekday and not ( 

488 self._token_year or self._token_month or self._token_day 

489 ): 

490 day_index = calendar.weekday(dateobj.year, dateobj.month, dateobj.day) 

491 day = token_weekday[:3].lower() 

492 steps = 0 

493 if "future" in self.settings.PREFER_DATES_FROM: 

494 if days[day_index] == day: 

495 steps = 7 

496 else: 

497 while days[day_index] != day: 

498 day_index = (day_index + 1) % 7 

499 steps += 1 

500 delta = timedelta(days=steps) 

501 else: 

502 if days[day_index] == day: 

503 if self.settings.PREFER_DATES_FROM == "past": 

504 steps = 7 

505 else: 

506 steps = 0 

507 else: 

508 while days[day_index] != day: 

509 day_index -= 1 

510 steps += 1 

511 delta = timedelta(days=-steps) 

512 

513 dateobj = dateobj + delta 

514 

515 # NOTE: If this assert fires, self.now needs to be made offset-aware in a similar 

516 # way that dateobj is temporarily made offset-aware. 

517 assert not ( 

518 self.now.tzinfo is None and dateobj.tzinfo is not None 

519 ), "`self.now` doesn't have `tzinfo`. Review comment in code for details." 

520 

521 # Store the original dateobj values so that upon subsequent parsing everything is not 

522 # treated as offset-aware if offset awareness is changed. 

523 original_dateobj = dateobj 

524 

525 # Since date comparisons must be either offset-naive or offset-aware, normalize dateobj 

526 # to be offset-aware if one or the other is already offset-aware. 

527 if self.now.tzinfo is not None and dateobj.tzinfo is None: 

528 dateobj = pytz.utc.localize(dateobj) 

529 

530 if self.month and not self.year: 

531 try: 

532 if self.now < dateobj: 

533 if self.settings.PREFER_DATES_FROM == "past": 

534 dateobj = dateobj.replace(year=dateobj.year - 1) 

535 else: 

536 if self.settings.PREFER_DATES_FROM == "future": 

537 dateobj = dateobj.replace(year=dateobj.year + 1) 

538 except ValueError as e: 

539 if dateobj.day == 29 and dateobj.month == 2: 

540 valid_year = self._get_correct_leap_year( 

541 self.settings.PREFER_DATES_FROM, dateobj.year 

542 ) 

543 dateobj = dateobj.replace(year=valid_year) 

544 else: 

545 raise e 

546 

547 if self._token_year and len(self._token_year[0]) == 2: 

548 if self.now < dateobj: 

549 if "past" in self.settings.PREFER_DATES_FROM: 

550 dateobj = dateobj.replace(year=dateobj.year - 100) 

551 else: 

552 if "future" in self.settings.PREFER_DATES_FROM: 

553 dateobj = dateobj.replace(year=dateobj.year + 100) 

554 

555 if self._token_time and not any( 

556 [ 

557 self._token_year, 

558 self._token_month, 

559 self._token_day, 

560 hasattr(self, "_token_weekday"), 

561 ] 

562 ): 

563 # Convert dateobj to utc time to compare with self.now 

564 try: 

565 tz = tz or get_timezone_from_tz_string(self.settings.TIMEZONE) 

566 tz_offset = tz.utcoffset(dateobj) 

567 except (pytz.UnknownTimeZoneError, pytz.NonExistentTimeError): 

568 tz_offset = timedelta(hours=0) 

569 

570 if "past" in self.settings.PREFER_DATES_FROM: 

571 if self.now < dateobj - tz_offset: 

572 dateobj = dateobj + timedelta(days=-1) 

573 if "future" in self.settings.PREFER_DATES_FROM: 

574 if self.now > dateobj - tz_offset: 

575 dateobj = dateobj + timedelta(days=1) 

576 

577 # Reset dateobj to the original value, thus removing any offset awareness that may 

578 # have been set earlier. 

579 dateobj = dateobj.replace(tzinfo=original_dateobj.tzinfo) 

580 

581 return dateobj 

582 

583 def _correct_for_day(self, dateobj): 

584 if ( 

585 getattr(self, "_token_day", None) 

586 or getattr(self, "_token_weekday", None) 

587 or getattr(self, "_token_time", None) 

588 ): 

589 return dateobj 

590 

591 dateobj = set_correct_day_from_settings( 

592 dateobj, self.settings, current_day=self.now.day 

593 ) 

594 return dateobj 

595 

596 def _correct_for_month(self, dateobj): 

597 relative_base = getattr(self.settings, "RELATIVE_BASE", None) 

598 relative_base_month = ( 

599 relative_base.month if hasattr(relative_base, "month") else relative_base 

600 ) 

601 

602 if getattr(self, "_token_month", None): 

603 return dateobj 

604 

605 dateobj = set_correct_month_from_settings( 

606 dateobj, self.settings, relative_base_month 

607 ) 

608 return dateobj 

609 

610 @classmethod 

611 def parse(cls, datestring, settings, tz=None): 

612 tokens = tokenizer(datestring) 

613 po = cls(tokens.tokenize(), settings) 

614 dateobj = po._results() 

615 

616 # correction for past, future if applicable 

617 dateobj = po._correct_for_time_frame(dateobj, tz) 

618 

619 # correction for preference of month: beginning, current, end 

620 # must happen before day so that day is derived from the correct month 

621 dateobj = po._correct_for_month(dateobj) 

622 

623 # correction for preference of day: beginning, current, end 

624 dateobj = po._correct_for_day(dateobj) 

625 

626 period = po._get_period() 

627 

628 return dateobj, period 

629 

630 def _parse(self, type, token, skip_component=None): 

631 def set_and_return(token, type, component, dateobj, skip_date_order=False): 

632 if not skip_date_order: 

633 self.auto_order.append(component) 

634 setattr(self, "_token_%s" % component, (token, type)) 

635 return [(component, getattr(dateobj, component))] 

636 

637 def parse_number(token, skip_component=None): 

638 type = 0 

639 

640 for component, directives in self.ordered_num_directives.items(): 

641 if skip_component == component: 

642 continue 

643 for directive in directives: 

644 try: 

645 do = self._get_date_obj(token, directive) 

646 prev_value = getattr(self, component, None) 

647 if not prev_value: 

648 return set_and_return(token, type, component, do) 

649 else: 

650 try: 

651 prev_token, prev_type = getattr( 

652 self, "_token_%s" % component 

653 ) 

654 if prev_type == type: 

655 do = self._get_date_obj(prev_token, directive) 

656 except ValueError: 

657 self.unset_tokens.append( 

658 (prev_token, prev_type, component) 

659 ) 

660 return set_and_return(token, type, component, do) 

661 except ValueError: 

662 pass 

663 else: 

664 raise ValueError("Unable to parse: %s" % token) 

665 

666 def parse_alpha(token, skip_component=None): 

667 type = 1 

668 

669 for component, directives in self.alpha_directives.items(): 

670 if skip_component == component: 

671 continue 

672 for directive in directives: 

673 try: 

674 do = self._get_date_obj(token, directive) 

675 prev_value = getattr(self, component, None) 

676 if not prev_value: 

677 return set_and_return( 

678 token, type, component, do, skip_date_order=True 

679 ) 

680 elif component == "month": 

681 index = self.auto_order.index("month") 

682 self.auto_order[index] = "day" 

683 setattr(self, "_token_day", self._token_month) 

684 setattr(self, "_token_month", (token, type)) 

685 return [ 

686 (component, getattr(do, component)), 

687 ("day", prev_value), 

688 ] 

689 except: 

690 pass 

691 else: 

692 raise ValueError("Unable to parse: %s" % token) 

693 

694 handlers = {0: parse_number, 1: parse_alpha} 

695 return handlers[type](token, skip_component) 

696 

697 

698class tokenizer: 

699 digits = "0123456789:" 

700 letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 

701 

702 def _isletter(self, tkn): 

703 return tkn in self.letters 

704 

705 def _isdigit(self, tkn): 

706 return tkn in self.digits 

707 

708 def __init__(self, ds): 

709 self.instream = StringIO(ds) 

710 

711 def _switch(self, chara, charb): 

712 if self._isdigit(chara): 

713 return 0, not self._isdigit(charb) 

714 

715 if self._isletter(chara): 

716 return 1, not self._isletter(charb) 

717 

718 return 2, self._isdigit(charb) or self._isletter(charb) 

719 

720 def tokenize(self): 

721 token = "" 

722 EOF = False 

723 

724 while not EOF: 

725 nextchar = self.instream.read(1) 

726 

727 if not nextchar: 

728 EOF = True 

729 type, _ = self._switch(token[-1], nextchar) 

730 yield token, type 

731 return 

732 

733 if token: 

734 type, switch = self._switch(token[-1], nextchar) 

735 

736 if not switch: 

737 token += nextchar 

738 else: 

739 yield token, type 

740 token = nextchar 

741 else: 

742 token += nextchar