Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dateparser/freshness_date_parser.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

101 statements  

1from datetime import datetime, time, timezone 

2 

3import regex as re 

4from dateutil.relativedelta import relativedelta 

5from tzlocal import get_localzone 

6 

7from dateparser.utils import apply_timezone, localize_timezone, strip_braces 

8 

9from .parser import time_parser 

10from .timezone_parser import pop_tz_offset_from_string 

11 

12_UNITS = r"decade|year|month|week|day|hour|minute|second" 

13PATTERN = re.compile(r"(\d+[.,]?\d*)\s*(%s)\b" % _UNITS, re.I | re.S | re.U) 

14 

15 

16class FreshnessDateDataParser: 

17 """Parses date string like "1 year, 2 months ago" and "3 hours, 50 minutes ago" """ 

18 

19 def _are_all_words_units(self, date_string): 

20 skip = [_UNITS, r"ago|in|\d+", r":|[ap]m"] 

21 

22 date_string = re.sub(r"\s+", " ", date_string.strip()) 

23 

24 words = [x for x in re.split(r"\W", date_string) if x] 

25 words = [x for x in words if not re.match(r"%s" % "|".join(skip), x)] 

26 return not words 

27 

28 def _parse_time(self, date_string, settings): 

29 """Attempts to parse time part of date strings like '1 day ago, 2 PM'""" 

30 date_string = PATTERN.sub("", date_string) 

31 date_string = re.sub(r"\b(?:ago|in)\b", "", date_string) 

32 try: 

33 return time_parser(date_string) 

34 except: 

35 pass 

36 

37 def get_local_tz(self): 

38 return get_localzone() 

39 

40 def parse(self, date_string, settings): 

41 date_string = strip_braces(date_string) 

42 date_string, ptz = pop_tz_offset_from_string(date_string) 

43 _time = self._parse_time(date_string, settings) 

44 

45 _settings_tz = settings.TIMEZONE.lower() 

46 

47 def apply_time(dateobj, timeobj): 

48 if not isinstance(_time, time): 

49 return dateobj 

50 

51 return dateobj.replace( 

52 hour=timeobj.hour, 

53 minute=timeobj.minute, 

54 second=timeobj.second, 

55 microsecond=timeobj.microsecond, 

56 ) 

57 

58 if settings.RELATIVE_BASE: 

59 now = settings.RELATIVE_BASE 

60 

61 if "local" not in _settings_tz: 

62 now = localize_timezone(now, settings.TIMEZONE) 

63 

64 if ptz: 

65 if now.tzinfo: 

66 now = now.astimezone(ptz) 

67 else: 

68 if hasattr(ptz, "localize"): 

69 now = ptz.localize(now) 

70 else: 

71 now = now.replace(tzinfo=ptz) 

72 

73 if not now.tzinfo: 

74 if hasattr(self.get_local_tz(), "localize"): 

75 now = self.get_local_tz().localize(now) 

76 else: 

77 now = now.replace(tzinfo=self.get_local_tz()) 

78 

79 elif ptz: 

80 localized_now = datetime.now(ptz) 

81 

82 if "local" in _settings_tz: 

83 now = localized_now 

84 else: 

85 now = apply_timezone(localized_now, settings.TIMEZONE) 

86 

87 else: 

88 if "local" not in _settings_tz: 

89 utc_dt = datetime.now(tz=timezone.utc) 

90 now = apply_timezone(utc_dt, settings.TIMEZONE) 

91 else: 

92 now = datetime.now(self.get_local_tz()) 

93 

94 date, period = self._parse_date(date_string, now, settings.PREFER_DATES_FROM) 

95 

96 if date: 

97 old_date = date 

98 date = apply_time(date, _time) 

99 if settings.RETURN_TIME_AS_PERIOD and old_date != date: 

100 period = "time" 

101 

102 if settings.TO_TIMEZONE: 

103 date = apply_timezone(date, settings.TO_TIMEZONE) 

104 

105 if not settings.RETURN_AS_TIMEZONE_AWARE or ( 

106 settings.RETURN_AS_TIMEZONE_AWARE 

107 and "default" == settings.RETURN_AS_TIMEZONE_AWARE 

108 and not ptz 

109 ): 

110 date = date.replace(tzinfo=None) 

111 

112 return date, period 

113 

114 def _parse_date(self, date_string, now, prefer_dates_from): 

115 if not self._are_all_words_units(date_string): 

116 return None, None 

117 

118 kwargs = self.get_kwargs(date_string) 

119 if not kwargs: 

120 return None, None 

121 period = "day" 

122 if "days" not in kwargs: 

123 for k in ["weeks", "months", "years"]: 

124 if k in kwargs: 

125 period = k[:-1] 

126 break 

127 td = relativedelta(**kwargs) 

128 

129 if ( 

130 re.search(r"\bin\b", date_string) 

131 or re.search(r"\bfuture\b", prefer_dates_from) 

132 and not re.search(r"\bago\b", date_string) 

133 ): 

134 date = now + td 

135 else: 

136 date = now - td 

137 return date, period 

138 

139 def get_kwargs(self, date_string): 

140 m = PATTERN.findall(date_string) 

141 if not m: 

142 return {} 

143 

144 kwargs = {} 

145 for num, unit in m: 

146 kwargs[unit + "s"] = float(num.replace(",", ".")) 

147 if "decades" in kwargs: 

148 kwargs["years"] = 10 * kwargs["decades"] + kwargs.get("years", 0) 

149 del kwargs["decades"] 

150 return kwargs 

151 

152 def get_date_data(self, date_string, settings=None): 

153 from dateparser.date import DateData 

154 

155 date, period = self.parse(date_string, settings) 

156 return DateData(date_obj=date, period=period) 

157 

158 

159freshness_date_parser = FreshnessDateDataParser()