1# Copyright The OpenTelemetry Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import logging
16import threading
17from collections import OrderedDict
18from collections.abc import MutableMapping
19from typing import Mapping, Optional, Sequence, Tuple, Union
20
21from opentelemetry.util import types
22
23# bytes are accepted as a user supplied value for attributes but
24# decoded to strings internally.
25_VALID_ATTR_VALUE_TYPES = (bool, str, bytes, int, float)
26# AnyValue possible values
27_VALID_ANY_VALUE_TYPES = (
28 type(None),
29 bool,
30 bytes,
31 int,
32 float,
33 str,
34 Sequence,
35 Mapping,
36)
37
38
39_logger = logging.getLogger(__name__)
40
41
42def _clean_attribute(
43 key: str, value: types.AttributeValue, max_len: Optional[int]
44) -> Optional[Union[types.AttributeValue, Tuple[Union[str, int, float], ...]]]:
45 """Checks if attribute value is valid and cleans it if required.
46
47 The function returns the cleaned value or None if the value is not valid.
48
49 An attribute value is valid if it is either:
50 - A primitive type: string, boolean, double precision floating
51 point (IEEE 754-1985) or integer.
52 - An array of primitive type values. The array MUST be homogeneous,
53 i.e. it MUST NOT contain values of different types.
54
55 An attribute needs cleansing if:
56 - Its length is greater than the maximum allowed length.
57 - It needs to be encoded/decoded e.g, bytes to strings.
58 """
59
60 if not (key and isinstance(key, str)):
61 _logger.warning("invalid key `%s`. must be non-empty string.", key)
62 return None
63
64 if isinstance(value, _VALID_ATTR_VALUE_TYPES):
65 return _clean_attribute_value(value, max_len)
66
67 if isinstance(value, Sequence):
68 sequence_first_valid_type = None
69 cleaned_seq = []
70
71 for element in value:
72 element = _clean_attribute_value(element, max_len) # type: ignore
73 if element is None:
74 cleaned_seq.append(element)
75 continue
76
77 element_type = type(element)
78 # Reject attribute value if sequence contains a value with an incompatible type.
79 if element_type not in _VALID_ATTR_VALUE_TYPES:
80 _logger.warning(
81 "Invalid type %s in attribute '%s' value sequence. Expected one of "
82 "%s or None",
83 element_type.__name__,
84 key,
85 [
86 valid_type.__name__
87 for valid_type in _VALID_ATTR_VALUE_TYPES
88 ],
89 )
90 return None
91
92 # The type of the sequence must be homogeneous. The first non-None
93 # element determines the type of the sequence
94 if sequence_first_valid_type is None:
95 sequence_first_valid_type = element_type
96 # use equality instead of isinstance as isinstance(True, int) evaluates to True
97 elif element_type != sequence_first_valid_type:
98 _logger.warning(
99 "Attribute %r mixes types %s and %s in attribute value sequence",
100 key,
101 sequence_first_valid_type.__name__,
102 type(element).__name__,
103 )
104 return None
105
106 cleaned_seq.append(element)
107
108 # Freeze mutable sequences defensively
109 return tuple(cleaned_seq)
110
111 _logger.warning(
112 "Invalid type %s for attribute '%s' value. Expected one of %s or a "
113 "sequence of those types",
114 type(value).__name__,
115 key,
116 [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES],
117 )
118 return None
119
120
121def _clean_extended_attribute_value( # pylint: disable=too-many-branches
122 value: types.AnyValue, max_len: Optional[int]
123) -> types.AnyValue:
124 # for primitive types just return the value and eventually shorten the string length
125 if value is None or isinstance(value, _VALID_ATTR_VALUE_TYPES):
126 if max_len is not None and isinstance(value, str):
127 value = value[:max_len]
128 return value
129
130 if isinstance(value, Mapping):
131 cleaned_dict: dict[str, types.AnyValue] = {}
132 for key, element in value.items():
133 # skip invalid keys
134 if not (key and isinstance(key, str)):
135 _logger.warning(
136 "invalid key `%s`. must be non-empty string.", key
137 )
138 continue
139
140 cleaned_dict[key] = _clean_extended_attribute(
141 key=key, value=element, max_len=max_len
142 )
143
144 return cleaned_dict
145
146 if isinstance(value, Sequence):
147 sequence_first_valid_type = None
148 cleaned_seq: list[types.AnyValue] = []
149
150 for element in value:
151 if element is None:
152 cleaned_seq.append(element)
153 continue
154
155 if max_len is not None and isinstance(element, str):
156 element = element[:max_len]
157
158 element_type = type(element)
159 if element_type not in _VALID_ATTR_VALUE_TYPES:
160 element = _clean_extended_attribute_value(
161 element, max_len=max_len
162 )
163 element_type = type(element) # type: ignore
164
165 # The type of the sequence must be homogeneous. The first non-None
166 # element determines the type of the sequence
167 if sequence_first_valid_type is None:
168 sequence_first_valid_type = element_type
169 # use equality instead of isinstance as isinstance(True, int) evaluates to True
170 elif element_type != sequence_first_valid_type:
171 _logger.warning(
172 "Mixed types %s and %s in attribute value sequence",
173 sequence_first_valid_type.__name__,
174 type(element).__name__,
175 )
176 return None
177
178 cleaned_seq.append(element)
179
180 # Freeze mutable sequences defensively
181 return tuple(cleaned_seq)
182
183 # Some applications such as Django add values to log records whose types fall outside the
184 # primitive types and `_VALID_ANY_VALUE_TYPES`, i.e., they are not of type `AnyValue`.
185 # Rather than attempt to whitelist every possible instrumentation, we stringify those values here
186 # so they can still be represented as attributes, falling back to the original TypeError only if
187 # converting to string raises.
188 try:
189 return str(value)
190 except Exception:
191 raise TypeError(
192 f"Invalid type {type(value).__name__} for attribute value. "
193 f"Expected one of {[valid_type.__name__ for valid_type in _VALID_ANY_VALUE_TYPES]} or a "
194 "sequence of those types",
195 )
196
197
198def _clean_extended_attribute(
199 key: str, value: types.AnyValue, max_len: Optional[int]
200) -> types.AnyValue:
201 """Checks if attribute value is valid and cleans it if required.
202
203 The function returns the cleaned value or None if the value is not valid.
204
205 An attribute value is valid if it is an AnyValue.
206 An attribute needs cleansing if:
207 - Its length is greater than the maximum allowed length.
208 """
209
210 if not (key and isinstance(key, str)):
211 _logger.warning("invalid key `%s`. must be non-empty string.", key)
212 return None
213
214 try:
215 return _clean_extended_attribute_value(value, max_len=max_len)
216 except TypeError as exception:
217 _logger.warning("Attribute %s: %s", key, exception)
218 return None
219
220
221def _clean_attribute_value(
222 value: types.AttributeValue, limit: Optional[int]
223) -> Optional[types.AttributeValue]:
224 if value is None:
225 return None
226
227 if isinstance(value, bytes):
228 try:
229 value = value.decode()
230 except UnicodeDecodeError:
231 _logger.warning("Byte attribute could not be decoded.")
232 return None
233
234 if limit is not None and isinstance(value, str):
235 value = value[:limit]
236 return value
237
238
239class BoundedAttributes(MutableMapping): # type: ignore
240 """An ordered dict with a fixed max capacity.
241
242 Oldest elements are dropped when the dict is full and a new element is
243 added.
244 """
245
246 def __init__(
247 self,
248 maxlen: Optional[int] = None,
249 attributes: Optional[types._ExtendedAttributes] = None,
250 immutable: bool = True,
251 max_value_len: Optional[int] = None,
252 extended_attributes: bool = False,
253 ):
254 if maxlen is not None:
255 if not isinstance(maxlen, int) or maxlen < 0:
256 raise ValueError(
257 "maxlen must be valid int greater or equal to 0"
258 )
259 self.maxlen = maxlen
260 self.dropped = 0
261 self.max_value_len = max_value_len
262 self._extended_attributes = extended_attributes
263 # OrderedDict is not used until the maxlen is reached for efficiency.
264
265 self._dict: Union[
266 MutableMapping[str, types.AnyValue],
267 OrderedDict[str, types.AnyValue],
268 ] = {}
269 self._lock = threading.RLock()
270 if attributes:
271 for key, value in attributes.items():
272 self[key] = value
273 self._immutable = immutable
274
275 def __repr__(self) -> str:
276 return f"{dict(self._dict)}"
277
278 def __getitem__(self, key: str) -> types.AnyValue:
279 return self._dict[key]
280
281 def __setitem__(self, key: str, value: types.AnyValue) -> None:
282 if getattr(self, "_immutable", False): # type: ignore
283 raise TypeError
284 with self._lock:
285 if self.maxlen is not None and self.maxlen == 0:
286 self.dropped += 1
287 return
288
289 if self._extended_attributes:
290 value = _clean_extended_attribute(
291 key, value, self.max_value_len
292 )
293 else:
294 value = _clean_attribute(key, value, self.max_value_len) # type: ignore
295 if value is None:
296 return
297
298 if key in self._dict:
299 del self._dict[key]
300 elif self.maxlen is not None and len(self._dict) == self.maxlen:
301 if not isinstance(self._dict, OrderedDict):
302 self._dict = OrderedDict(self._dict)
303 self._dict.popitem(last=False) # type: ignore
304 self.dropped += 1
305
306 self._dict[key] = value # type: ignore
307
308 def __delitem__(self, key: str) -> None:
309 if getattr(self, "_immutable", False): # type: ignore
310 raise TypeError
311 with self._lock:
312 del self._dict[key]
313
314 def __iter__(self): # type: ignore
315 with self._lock:
316 return iter(self._dict.copy()) # type: ignore
317
318 def __len__(self) -> int:
319 return len(self._dict)
320
321 def copy(self): # type: ignore
322 return self._dict.copy() # type: ignore