1import calendar
2import logging
3import types
4import unicodedata
5from collections import OrderedDict
6from datetime import datetime
7
8import regex as re
9from pytz import UTC, UnknownTimeZoneError, timezone
10from tzlocal import get_localzone
11
12from dateparser.timezone_parser import StaticTzInfo, _tz_offsets
13
14
15def strip_braces(date_string):
16 return re.sub(r"[{}()<>\[\]]+", "", date_string)
17
18
19def normalize_unicode(string, form="NFKD"):
20 return "".join(
21 c
22 for c in unicodedata.normalize(form, string)
23 if unicodedata.category(c) != "Mn"
24 )
25
26
27def combine_dicts(primary_dict, supplementary_dict):
28 combined_dict = OrderedDict()
29 for key, value in primary_dict.items():
30 if key in supplementary_dict:
31 if isinstance(value, list):
32 combined_dict[key] = value + supplementary_dict[key]
33 elif isinstance(value, dict):
34 combined_dict[key] = combine_dicts(value, supplementary_dict[key])
35 else:
36 combined_dict[key] = supplementary_dict[key]
37 else:
38 combined_dict[key] = primary_dict[key]
39 remaining_keys = [
40 key for key in supplementary_dict.keys() if key not in primary_dict.keys()
41 ]
42 for key in remaining_keys:
43 combined_dict[key] = supplementary_dict[key]
44 return combined_dict
45
46
47def find_date_separator(format):
48 m = re.search(r"(?:(?:%[dbBmaA])(\W))+", format)
49 if m:
50 return m.group(1)
51
52
53def _get_missing_parts(fmt):
54 """
55 Return a list containing missing parts (day, month, year)
56 from a date format checking its directives
57 """
58 directive_mapping = {
59 "day": ["%d", "%-d", "%j", "%-j"],
60 "month": ["%b", "%B", "%m", "%-m"],
61 "year": ["%y", "%-y", "%Y"],
62 }
63
64 missing = [
65 field
66 for field in ("day", "month", "year")
67 if not any(directive in fmt for directive in directive_mapping[field])
68 ]
69 return missing
70
71
72def get_timezone_from_tz_string(tz_string):
73 try:
74 return timezone(tz_string)
75 except UnknownTimeZoneError as e:
76 for name, info in _tz_offsets:
77 if info["regex"].search(" %s" % tz_string):
78 return StaticTzInfo(name, info["offset"])
79 else:
80 raise e
81
82
83def localize_timezone(date_time, tz_string):
84 if date_time.tzinfo:
85 return date_time
86
87 tz = get_timezone_from_tz_string(tz_string)
88
89 if hasattr(tz, "localize"):
90 date_time = tz.localize(date_time)
91 else:
92 date_time = date_time.replace(tzinfo=tz)
93
94 return date_time
95
96
97def apply_tzdatabase_timezone(date_time, pytz_string):
98 usr_timezone = timezone(pytz_string)
99
100 if date_time.tzinfo != usr_timezone:
101 date_time = date_time.astimezone(usr_timezone)
102
103 return date_time
104
105
106def apply_dateparser_timezone(utc_datetime, offset_or_timezone_abb):
107 for name, info in _tz_offsets:
108 if info["regex"].search(" %s" % offset_or_timezone_abb):
109 tz = StaticTzInfo(name, info["offset"])
110 return utc_datetime.astimezone(tz)
111
112
113def apply_timezone(date_time, tz_string):
114 if not date_time.tzinfo:
115 if hasattr(UTC, "localize"):
116 date_time = UTC.localize(date_time)
117 else:
118 date_time = date_time.replace(tzinfo=UTC)
119
120 new_datetime = apply_dateparser_timezone(date_time, tz_string)
121
122 if not new_datetime:
123 new_datetime = apply_tzdatabase_timezone(date_time, tz_string)
124
125 return new_datetime
126
127
128def apply_timezone_from_settings(date_obj, settings):
129 tz = get_localzone()
130 if settings is None:
131 return date_obj
132
133 if "local" in settings.TIMEZONE.lower():
134 if hasattr(tz, "localize"):
135 date_obj = tz.localize(date_obj)
136 else:
137 date_obj = date_obj.replace(tzinfo=tz)
138 else:
139 date_obj = localize_timezone(date_obj, settings.TIMEZONE)
140
141 if settings.TO_TIMEZONE:
142 date_obj = apply_timezone(date_obj, settings.TO_TIMEZONE)
143
144 if settings.RETURN_AS_TIMEZONE_AWARE is not True:
145 date_obj = date_obj.replace(tzinfo=None)
146
147 return date_obj
148
149
150def get_last_day_of_month(year, month):
151 return calendar.monthrange(year, month)[1]
152
153
154def get_previous_leap_year(year):
155 return _get_leap_year(year, future=False)
156
157
158def get_next_leap_year(year):
159 return _get_leap_year(year, future=True)
160
161
162def _get_leap_year(year, future):
163 """
164 Iterate through previous or next years until it gets a valid leap year
165 This is performed to avoid missing or including centurial leap years
166 """
167 step = 1 if future else -1
168 leap_year = year + step
169 while not calendar.isleap(leap_year):
170 leap_year += step
171 return leap_year
172
173
174def set_correct_day_from_settings(date_obj, settings, current_day=None):
175 """Set correct day attending the `PREFER_DAY_OF_MONTH` setting."""
176 options = {
177 "first": 1,
178 "last": get_last_day_of_month(date_obj.year, date_obj.month),
179 "current": current_day or datetime.now().day,
180 }
181
182 try:
183 return date_obj.replace(day=options[settings.PREFER_DAY_OF_MONTH])
184 except ValueError:
185 return date_obj.replace(day=options["last"])
186
187
188def set_correct_month_from_settings(date_obj, settings, current_month=None):
189 """Set correct month attending the `PREFER_MONTH_OF_YEAR` setting."""
190 options = {"first": 1, "last": 12, "current": current_month or datetime.now().month}
191
192 try:
193 return date_obj.replace(month=options[settings.PREFER_MONTH_OF_YEAR])
194 except ValueError:
195 return date_obj.replace(month=options["last"])
196
197
198def registry(cls):
199 def choose(creator):
200 def constructor(cls, *args, **kwargs):
201 key = cls.get_key(*args, **kwargs)
202
203 if not hasattr(cls, "__registry_dict"):
204 setattr(cls, "__registry_dict", {})
205 registry_dict = getattr(cls, "__registry_dict")
206
207 if key not in registry_dict:
208 registry_dict[key] = creator(cls, *args)
209 setattr(registry_dict[key], "registry_key", key)
210 return registry_dict[key]
211
212 return staticmethod(constructor)
213
214 if not (
215 hasattr(cls, "get_key")
216 and isinstance(cls.get_key, types.MethodType)
217 and cls.get_key.__self__ is cls
218 ):
219 raise NotImplementedError(
220 "Registry classes require to implement class method get_key"
221 )
222
223 setattr(cls, "__new__", choose(cls.__new__))
224 return cls
225
226
227def get_logger():
228 setup_logging()
229 return logging.getLogger("dateparser")
230
231
232def setup_logging():
233 if len(logging.root.handlers):
234 return
235
236 config = {
237 "version": 1,
238 "disable_existing_loggers": True,
239 "formatters": {
240 "console": {
241 "format": "%(asctime)s %(levelname)s: [%(name)s] %(message)s",
242 },
243 },
244 "handlers": {
245 "console": {
246 "level": logging.DEBUG,
247 "class": "logging.StreamHandler",
248 "formatter": "console",
249 "stream": "ext://sys.stdout",
250 },
251 },
252 "root": {
253 "level": logging.DEBUG,
254 "handlers": ["console"],
255 },
256 }
257 logging.config.dictConfig(config)