1from __future__ import annotations
2
3import codecs
4import collections.abc as cabc
5import re
6import typing as t
7
8from ..http import dump_options_header
9from ..http import parse_list_header
10from ..http import parse_options_header
11from .structures import ImmutableList
12
13if t.TYPE_CHECKING:
14 import typing_extensions as te
15
16_q_value_re = re.compile(r"-?\d+(\.\d+)?", re.ASCII)
17
18
19class Accept(ImmutableList[tuple[str, float]]):
20 """An :class:`Accept` object is just a list subclass for lists of
21 ``(value, quality)`` tuples. It is automatically sorted by specificity
22 and quality.
23
24 All :class:`Accept` objects work similar to a list but provide extra
25 functionality for working with the data. Containment checks are
26 normalized to the rules of that header:
27
28 >>> a = CharsetAccept([('ISO-8859-1', 1), ('utf-8', 0.7)])
29 >>> a.best
30 'ISO-8859-1'
31 >>> 'iso-8859-1' in a
32 True
33 >>> 'UTF8' in a
34 True
35 >>> 'utf7' in a
36 False
37
38 To get the quality for an item you can use normal item lookup:
39
40 >>> print a['utf-8']
41 0.7
42 >>> a['utf7']
43 0
44
45 .. versionchanged:: 0.5
46 :class:`Accept` objects are forced immutable now.
47
48 .. versionchanged:: 1.0.0
49 :class:`Accept` internal values are no longer ordered
50 alphabetically for equal quality tags. Instead the initial
51 order is preserved.
52
53 """
54
55 def __init__(
56 self, values: Accept | cabc.Iterable[tuple[str, float]] | None = ()
57 ) -> None:
58 if values is None:
59 super().__init__()
60 self.provided = False
61 elif isinstance(values, Accept):
62 self.provided = values.provided
63 super().__init__(values)
64 else:
65 self.provided = True
66 values = sorted(
67 values, key=lambda x: (self._specificity(x[0]), x[1]), reverse=True
68 )
69 super().__init__(values)
70
71 def _specificity(self, value: str) -> tuple[bool, ...]:
72 """Returns a tuple describing the value's specificity."""
73 return (value != "*",)
74
75 def _value_matches(self, value: str, item: str) -> bool:
76 """Check if a value matches a given accept item."""
77 return item == "*" or item.lower() == value.lower()
78
79 @t.overload
80 def __getitem__(self, key: str) -> float: ...
81 @t.overload
82 def __getitem__(self, key: t.SupportsIndex) -> tuple[str, float]: ...
83 @t.overload
84 def __getitem__(self, key: slice) -> list[tuple[str, float]]: ...
85 def __getitem__(
86 self, key: str | t.SupportsIndex | slice
87 ) -> float | tuple[str, float] | list[tuple[str, float]]:
88 """Besides index lookup (getting item n) you can also pass it a string
89 to get the quality for the item. If the item is not in the list, the
90 returned quality is ``0``.
91 """
92 if isinstance(key, str):
93 return self.quality(key)
94 return list.__getitem__(self, key)
95
96 def quality(self, key: str) -> float:
97 """Returns the quality of the key.
98
99 .. versionadded:: 0.6
100 In previous versions you had to use the item-lookup syntax
101 (eg: ``obj[key]`` instead of ``obj.quality(key)``)
102 """
103 for item, quality in self:
104 if self._value_matches(key, item):
105 return quality
106 return 0
107
108 def __contains__(self, value: str) -> bool: # type: ignore[override]
109 for item, _quality in self:
110 if self._value_matches(value, item):
111 return True
112 return False
113
114 def __repr__(self) -> str:
115 pairs_str = ", ".join(f"({x!r}, {y})" for x, y in self)
116 return f"{type(self).__name__}([{pairs_str}])"
117
118 def index(self, key: str | tuple[str, float]) -> int: # type: ignore[override]
119 """Get the position of an entry or raise :exc:`ValueError`.
120
121 :param key: The key to be looked up.
122
123 .. versionchanged:: 0.5
124 This used to raise :exc:`IndexError`, which was inconsistent
125 with the list API.
126 """
127 if isinstance(key, str):
128 for idx, (item, _quality) in enumerate(self):
129 if self._value_matches(key, item):
130 return idx
131 raise ValueError(key)
132 return list.index(self, key)
133
134 def find(self, key: str | tuple[str, float]) -> int:
135 """Get the position of an entry or return -1.
136
137 :param key: The key to be looked up.
138 """
139 try:
140 return self.index(key)
141 except ValueError:
142 return -1
143
144 def values(self) -> cabc.Iterator[str]:
145 """Iterate over all values."""
146 for item in self:
147 yield item[0]
148
149 @classmethod
150 def from_header(cls, value: str | None) -> te.Self:
151 """Parse an ``Accept`` header value and create an instance of this class.
152
153 .. versionadded:: 3.2
154 """
155 if not value:
156 return cls(None)
157
158 result = []
159
160 for item in parse_list_header(value):
161 item, options = parse_options_header(item)
162
163 if "q" in options:
164 # pop q, remaining options are reconstructed
165 q_str = options.pop("q").strip()
166
167 if _q_value_re.fullmatch(q_str) is None:
168 # ignore an invalid q
169 continue
170
171 q = float(q_str)
172
173 if q < 0 or q > 1:
174 # ignore an invalid q
175 continue
176 else:
177 q = 1
178
179 if options:
180 # reconstruct the media type with any options
181 item = dump_options_header(item, options)
182
183 result.append((item, q))
184
185 return cls(result)
186
187 def to_header(self) -> str:
188 """Convert to an ``Accept`` header value."""
189 result = []
190 for value, quality in self:
191 if quality != 1:
192 value = f"{value};q={quality}"
193 result.append(value)
194 return ",".join(result)
195
196 def __str__(self) -> str:
197 return self.to_header()
198
199 def _best_single_match(self, match: str) -> tuple[str, float] | None:
200 for client_item, quality in self:
201 if self._value_matches(match, client_item):
202 # self is sorted by specificity descending, we can exit
203 return client_item, quality
204 return None
205
206 @t.overload
207 def best_match(self, matches: cabc.Iterable[str]) -> str | None: ...
208 @t.overload
209 def best_match(self, matches: cabc.Iterable[str], default: str = ...) -> str: ...
210 def best_match(
211 self, matches: cabc.Iterable[str], default: str | None = None
212 ) -> str | None:
213 """Returns the best match from a list of possible matches based
214 on the specificity and quality of the client. If two items have the
215 same quality and specificity, the one is returned that comes first.
216
217 :param matches: a list of matches to check for
218 :param default: the value that is returned if none match
219 """
220 result = default
221 best_quality: float = -1
222 best_specificity: tuple[float, ...] = (-1,)
223 for server_item in matches:
224 match = self._best_single_match(server_item)
225 if not match:
226 continue
227 client_item, quality = match
228 specificity = self._specificity(client_item)
229 if quality <= 0 or quality < best_quality:
230 continue
231 # better quality or same quality but more specific => better match
232 if quality > best_quality or specificity > best_specificity:
233 result = server_item
234 best_quality = quality
235 best_specificity = specificity
236 return result
237
238 @property
239 def best(self) -> str | None:
240 """The best match as value."""
241 if self:
242 return self[0][0]
243
244 return None
245
246
247_mime_split_re = re.compile(r"/|(?:\s*;\s*)")
248
249
250def _normalize_mime(value: str) -> list[str]:
251 return _mime_split_re.split(value.lower())
252
253
254class MIMEAccept(Accept):
255 """Like :class:`Accept` but with special methods and behavior for
256 mimetypes.
257 """
258
259 def _specificity(self, value: str) -> tuple[bool, ...]:
260 return tuple(x != "*" for x in _mime_split_re.split(value))
261
262 def _value_matches(self, value: str, item: str) -> bool:
263 # item comes from the client, can't match if it's invalid.
264 if "/" not in item:
265 return False
266
267 # value comes from the application, tell the developer when it
268 # doesn't look valid.
269 if "/" not in value:
270 raise ValueError(f"invalid mimetype {value!r}")
271
272 # Split the match value into type, subtype, and a sorted list of parameters.
273 normalized_value = _normalize_mime(value)
274 value_type, value_subtype = normalized_value[:2]
275 value_params = sorted(normalized_value[2:])
276
277 # "*/*" is the only valid value that can start with "*".
278 if value_type == "*" and value_subtype != "*":
279 raise ValueError(f"invalid mimetype {value!r}")
280
281 # Split the accept item into type, subtype, and parameters.
282 normalized_item = _normalize_mime(item)
283 item_type, item_subtype = normalized_item[:2]
284 item_params = sorted(normalized_item[2:])
285
286 # "*/not-*" from the client is invalid, can't match.
287 if item_type == "*" and item_subtype != "*":
288 return False
289
290 return (
291 (item_type == "*" and item_subtype == "*")
292 or (value_type == "*" and value_subtype == "*")
293 ) or (
294 item_type == value_type
295 and (
296 item_subtype == "*"
297 or value_subtype == "*"
298 or (item_subtype == value_subtype and item_params == value_params)
299 )
300 )
301
302 @property
303 def accept_html(self) -> bool:
304 """True if this object accepts HTML."""
305 return "text/html" in self or self.accept_xhtml # type: ignore[comparison-overlap]
306
307 @property
308 def accept_xhtml(self) -> bool:
309 """True if this object accepts XHTML."""
310 return "application/xhtml+xml" in self or "application/xml" in self # type: ignore[comparison-overlap]
311
312 @property
313 def accept_json(self) -> bool:
314 """True if this object accepts JSON."""
315 return "application/json" in self # type: ignore[comparison-overlap]
316
317
318_locale_delim_re = re.compile(r"[_-]")
319
320
321def _normalize_lang(value: str) -> list[str]:
322 """Process a language tag for matching."""
323 return _locale_delim_re.split(value.lower())
324
325
326class LanguageAccept(Accept):
327 """Like :class:`Accept` but with normalization for language tags."""
328
329 def _value_matches(self, value: str, item: str) -> bool:
330 return item == "*" or _normalize_lang(value) == _normalize_lang(item)
331
332 @t.overload
333 def best_match(self, matches: cabc.Iterable[str]) -> str | None: ...
334 @t.overload
335 def best_match(self, matches: cabc.Iterable[str], default: str = ...) -> str: ...
336 def best_match(
337 self, matches: cabc.Iterable[str], default: str | None = None
338 ) -> str | None:
339 """Given a list of supported values, finds the best match from
340 the list of accepted values.
341
342 Language tags are normalized for the purpose of matching, but
343 are returned unchanged.
344
345 If no exact match is found, this will fall back to matching
346 the first subtag (primary language only), first with the
347 accepted values then with the match values. This partial is not
348 applied to any other language subtags.
349
350 The default is returned if no exact or fallback match is found.
351
352 :param matches: A list of supported languages to find a match.
353 :param default: The value that is returned if none match.
354 """
355 # Look for an exact match first. If a client accepts "en-US",
356 # "en-US" is a valid match at this point.
357 result = super().best_match(matches)
358
359 if result is not None:
360 return result
361
362 # Fall back to accepting primary tags. If a client accepts
363 # "en-US", "en" is a valid match at this point. Need to use
364 # re.split to account for 2 or 3 letter codes.
365 fallback = Accept(
366 [(_locale_delim_re.split(item[0], 1)[0], item[1]) for item in self]
367 )
368 result = fallback.best_match(matches)
369
370 if result is not None:
371 return result
372
373 # Fall back to matching primary tags. If the client accepts
374 # "en", "en-US" is a valid match at this point.
375 fallback_matches = [_locale_delim_re.split(item, 1)[0] for item in matches]
376 result = super().best_match(fallback_matches)
377
378 # Return a value from the original match list. Find the first
379 # original value that starts with the matched primary tag.
380 if result is not None:
381 return next(item for item in matches if item.startswith(result))
382
383 return default
384
385
386class _CharsetAccept(Accept):
387 """Like :class:`Accept` but with normalization for charsets."""
388
389 def _value_matches(self, value: str, item: str) -> bool:
390 def _normalize(name: str) -> str:
391 try:
392 return codecs.lookup(name).name
393 except LookupError:
394 return name.lower()
395
396 return item == "*" or _normalize(value) == _normalize(item)
397
398
399def __getattr__(name: str) -> t.Any:
400 if name == "CharsetAccept":
401 import warnings
402
403 warnings.warn(
404 "The 'CharsetAccept' class is deprecated and will be removed in"
405 " Werkzeug 3.3. The 'Accept-Charset' header is not sent by"
406 " browsers, and UTF-8 is assumed.",
407 DeprecationWarning,
408 stacklevel=2,
409 )
410 return _CharsetAccept
411
412 raise AttributeError(name)