1from datetime import datetime, time, timezone
2
3import regex as re
4from dateutil.relativedelta import relativedelta
5from tzlocal import get_localzone
6
7from dateparser.utils import apply_timezone, localize_timezone, strip_braces
8
9from .parser import time_parser
10from .timezone_parser import pop_tz_offset_from_string
11
12_UNITS = r"decade|year|month|week|day|hour|minute|second"
13PATTERN = re.compile(r"(\d+[.,]?\d*)\s*(%s)\b" % _UNITS, re.I | re.S | re.U)
14
15
16class FreshnessDateDataParser:
17 """Parses date string like "1 year, 2 months ago" and "3 hours, 50 minutes ago" """
18
19 def _are_all_words_units(self, date_string):
20 skip = [_UNITS, r"ago|in|\d+", r":|[ap]m"]
21
22 date_string = re.sub(r"\s+", " ", date_string.strip())
23
24 words = [x for x in re.split(r"\W", date_string) if x]
25 words = [x for x in words if not re.match(r"%s" % "|".join(skip), x)]
26 return not words
27
28 def _parse_time(self, date_string, settings):
29 """Attempts to parse time part of date strings like '1 day ago, 2 PM'"""
30 date_string = PATTERN.sub("", date_string)
31 date_string = re.sub(r"\b(?:ago|in)\b", "", date_string)
32 try:
33 return time_parser(date_string)
34 except:
35 pass
36
37 def get_local_tz(self):
38 return get_localzone()
39
40 def parse(self, date_string, settings):
41 date_string = strip_braces(date_string)
42 date_string, ptz = pop_tz_offset_from_string(date_string)
43 _time = self._parse_time(date_string, settings)
44
45 _settings_tz = settings.TIMEZONE.lower()
46
47 def apply_time(dateobj, timeobj):
48 if not isinstance(_time, time):
49 return dateobj
50
51 return dateobj.replace(
52 hour=timeobj.hour,
53 minute=timeobj.minute,
54 second=timeobj.second,
55 microsecond=timeobj.microsecond,
56 )
57
58 if settings.RELATIVE_BASE:
59 now = settings.RELATIVE_BASE
60
61 if "local" not in _settings_tz:
62 now = localize_timezone(now, settings.TIMEZONE)
63
64 if ptz:
65 if now.tzinfo:
66 now = now.astimezone(ptz)
67 else:
68 if hasattr(ptz, "localize"):
69 now = ptz.localize(now)
70 else:
71 now = now.replace(tzinfo=ptz)
72
73 if not now.tzinfo:
74 if hasattr(self.get_local_tz(), "localize"):
75 now = self.get_local_tz().localize(now)
76 else:
77 now = now.replace(tzinfo=self.get_local_tz())
78
79 elif ptz:
80 localized_now = datetime.now(ptz)
81
82 if "local" in _settings_tz:
83 now = localized_now
84 else:
85 now = apply_timezone(localized_now, settings.TIMEZONE)
86
87 else:
88 if "local" not in _settings_tz:
89 utc_dt = datetime.now(tz=timezone.utc)
90 now = apply_timezone(utc_dt, settings.TIMEZONE)
91 else:
92 now = datetime.now(self.get_local_tz())
93
94 date, period = self._parse_date(date_string, now, settings.PREFER_DATES_FROM)
95
96 if date:
97 old_date = date
98 date = apply_time(date, _time)
99 if settings.RETURN_TIME_AS_PERIOD and old_date != date:
100 period = "time"
101
102 if settings.TO_TIMEZONE:
103 date = apply_timezone(date, settings.TO_TIMEZONE)
104
105 if not settings.RETURN_AS_TIMEZONE_AWARE or (
106 settings.RETURN_AS_TIMEZONE_AWARE
107 and "default" == settings.RETURN_AS_TIMEZONE_AWARE
108 and not ptz
109 ):
110 date = date.replace(tzinfo=None)
111
112 return date, period
113
114 def _parse_date(self, date_string, now, prefer_dates_from):
115 if not self._are_all_words_units(date_string):
116 return None, None
117
118 kwargs = self.get_kwargs(date_string)
119 if not kwargs:
120 return None, None
121 period = "day"
122 if "days" not in kwargs:
123 for k in ["weeks", "months", "years"]:
124 if k in kwargs:
125 period = k[:-1]
126 break
127 td = relativedelta(**kwargs)
128
129 if (
130 re.search(r"\bin\b", date_string)
131 or re.search(r"\bfuture\b", prefer_dates_from)
132 and not re.search(r"\bago\b", date_string)
133 ):
134 date = now + td
135 else:
136 date = now - td
137 return date, period
138
139 def get_kwargs(self, date_string):
140 m = PATTERN.findall(date_string)
141 if not m:
142 return {}
143
144 kwargs = {}
145 for num, unit in m:
146 kwargs[unit + "s"] = float(num.replace(",", "."))
147 if "decades" in kwargs:
148 kwargs["years"] = 10 * kwargs["decades"] + kwargs.get("years", 0)
149 del kwargs["decades"]
150 return kwargs
151
152 def get_date_data(self, date_string, settings=None):
153 from dateparser.date import DateData
154
155 date, period = self.parse(date_string, settings)
156 return DateData(date_obj=date, period=period)
157
158
159freshness_date_parser = FreshnessDateDataParser()