1# Copyright The OpenTelemetry Authors
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import logging
16import threading
17from collections import OrderedDict
18from collections.abc import MutableMapping
19from typing import Mapping, Optional, Sequence, Tuple, Union
20
21from opentelemetry.util import types
22
23# bytes are accepted as a user supplied value for attributes but
24# decoded to strings internally.
25_VALID_ATTR_VALUE_TYPES = (bool, str, bytes, int, float)
26# AnyValue possible values
27_VALID_ANY_VALUE_TYPES = (
28 type(None),
29 bool,
30 bytes,
31 int,
32 float,
33 str,
34 Sequence,
35 Mapping,
36)
37
38
39_logger = logging.getLogger(__name__)
40
41
42def _clean_attribute(
43 key: str, value: types.AttributeValue, max_len: Optional[int]
44) -> Optional[Union[types.AttributeValue, Tuple[Union[str, int, float], ...]]]:
45 """Checks if attribute value is valid and cleans it if required.
46
47 The function returns the cleaned value or None if the value is not valid.
48
49 An attribute value is valid if it is either:
50 - A primitive type: string, boolean, double precision floating
51 point (IEEE 754-1985) or integer.
52 - An array of primitive type values. The array MUST be homogeneous,
53 i.e. it MUST NOT contain values of different types.
54
55 An attribute needs cleansing if:
56 - Its length is greater than the maximum allowed length.
57 - It needs to be encoded/decoded e.g, bytes to strings.
58 """
59
60 if not (key and isinstance(key, str)):
61 _logger.warning("invalid key `%s`. must be non-empty string.", key)
62 return None
63
64 if isinstance(value, _VALID_ATTR_VALUE_TYPES):
65 return _clean_attribute_value(value, max_len)
66
67 if isinstance(value, Sequence):
68 sequence_first_valid_type = None
69 cleaned_seq = []
70
71 for element in value:
72 element = _clean_attribute_value(element, max_len) # type: ignore
73 if element is None:
74 cleaned_seq.append(element)
75 continue
76
77 element_type = type(element)
78 # Reject attribute value if sequence contains a value with an incompatible type.
79 if element_type not in _VALID_ATTR_VALUE_TYPES:
80 _logger.warning(
81 "Invalid type %s in attribute '%s' value sequence. Expected one of "
82 "%s or None",
83 element_type.__name__,
84 key,
85 [
86 valid_type.__name__
87 for valid_type in _VALID_ATTR_VALUE_TYPES
88 ],
89 )
90 return None
91
92 # The type of the sequence must be homogeneous. The first non-None
93 # element determines the type of the sequence
94 if sequence_first_valid_type is None:
95 sequence_first_valid_type = element_type
96 # use equality instead of isinstance as isinstance(True, int) evaluates to True
97 elif element_type != sequence_first_valid_type:
98 _logger.warning(
99 "Attribute %r mixes types %s and %s in attribute value sequence",
100 key,
101 sequence_first_valid_type.__name__,
102 type(element).__name__,
103 )
104 return None
105
106 cleaned_seq.append(element)
107
108 # Freeze mutable sequences defensively
109 return tuple(cleaned_seq)
110
111 _logger.warning(
112 "Invalid type %s for attribute '%s' value. Expected one of %s or a "
113 "sequence of those types",
114 type(value).__name__,
115 key,
116 [valid_type.__name__ for valid_type in _VALID_ATTR_VALUE_TYPES],
117 )
118 return None
119
120
121def _clean_extended_attribute_value(
122 value: types.AnyValue, max_len: Optional[int]
123) -> types.AnyValue:
124 # for primitive types just return the value and eventually shorten the string length
125 if value is None or isinstance(value, _VALID_ATTR_VALUE_TYPES):
126 if max_len is not None and isinstance(value, str):
127 value = value[:max_len]
128 return value
129
130 if isinstance(value, Mapping):
131 cleaned_dict: dict[str, types.AnyValue] = {}
132 for key, element in value.items():
133 # skip invalid keys
134 if not (key and isinstance(key, str)):
135 _logger.warning(
136 "invalid key `%s`. must be non-empty string.", key
137 )
138 continue
139
140 cleaned_dict[key] = _clean_extended_attribute(
141 key=key, value=element, max_len=max_len
142 )
143
144 return cleaned_dict
145
146 if isinstance(value, Sequence):
147 sequence_first_valid_type = None
148 cleaned_seq: list[types.AnyValue] = []
149
150 for element in value:
151 if element is None:
152 cleaned_seq.append(element)
153 continue
154
155 if max_len is not None and isinstance(element, str):
156 element = element[:max_len]
157
158 element_type = type(element)
159 if element_type not in _VALID_ATTR_VALUE_TYPES:
160 element = _clean_extended_attribute_value(
161 element, max_len=max_len
162 )
163 element_type = type(element) # type: ignore
164
165 # The type of the sequence must be homogeneous. The first non-None
166 # element determines the type of the sequence
167 if sequence_first_valid_type is None:
168 sequence_first_valid_type = element_type
169 # use equality instead of isinstance as isinstance(True, int) evaluates to True
170 elif element_type != sequence_first_valid_type:
171 _logger.warning(
172 "Mixed types %s and %s in attribute value sequence",
173 sequence_first_valid_type.__name__,
174 type(element).__name__,
175 )
176 return None
177
178 cleaned_seq.append(element)
179
180 # Freeze mutable sequences defensively
181 return tuple(cleaned_seq)
182
183 raise TypeError(
184 f"Invalid type {type(value).__name__} for attribute value. "
185 f"Expected one of {[valid_type.__name__ for valid_type in _VALID_ANY_VALUE_TYPES]} or a "
186 "sequence of those types",
187 )
188
189
190def _clean_extended_attribute(
191 key: str, value: types.AnyValue, max_len: Optional[int]
192) -> types.AnyValue:
193 """Checks if attribute value is valid and cleans it if required.
194
195 The function returns the cleaned value or None if the value is not valid.
196
197 An attribute value is valid if it is an AnyValue.
198 An attribute needs cleansing if:
199 - Its length is greater than the maximum allowed length.
200 """
201
202 if not (key and isinstance(key, str)):
203 _logger.warning("invalid key `%s`. must be non-empty string.", key)
204 return None
205
206 try:
207 return _clean_extended_attribute_value(value, max_len=max_len)
208 except TypeError as exception:
209 _logger.warning("Attribute %s: %s", key, exception)
210 return None
211
212
213def _clean_attribute_value(
214 value: types.AttributeValue, limit: Optional[int]
215) -> Optional[types.AttributeValue]:
216 if value is None:
217 return None
218
219 if isinstance(value, bytes):
220 try:
221 value = value.decode()
222 except UnicodeDecodeError:
223 _logger.warning("Byte attribute could not be decoded.")
224 return None
225
226 if limit is not None and isinstance(value, str):
227 value = value[:limit]
228 return value
229
230
231class BoundedAttributes(MutableMapping): # type: ignore
232 """An ordered dict with a fixed max capacity.
233
234 Oldest elements are dropped when the dict is full and a new element is
235 added.
236 """
237
238 def __init__(
239 self,
240 maxlen: Optional[int] = None,
241 attributes: Optional[types._ExtendedAttributes] = None,
242 immutable: bool = True,
243 max_value_len: Optional[int] = None,
244 extended_attributes: bool = False,
245 ):
246 if maxlen is not None:
247 if not isinstance(maxlen, int) or maxlen < 0:
248 raise ValueError(
249 "maxlen must be valid int greater or equal to 0"
250 )
251 self.maxlen = maxlen
252 self.dropped = 0
253 self.max_value_len = max_value_len
254 self._extended_attributes = extended_attributes
255 # OrderedDict is not used until the maxlen is reached for efficiency.
256
257 self._dict: Union[
258 MutableMapping[str, types.AnyValue],
259 OrderedDict[str, types.AnyValue],
260 ] = {}
261 self._lock = threading.RLock()
262 if attributes:
263 for key, value in attributes.items():
264 self[key] = value
265 self._immutable = immutable
266
267 def __repr__(self) -> str:
268 return f"{dict(self._dict)}"
269
270 def __getitem__(self, key: str) -> types.AnyValue:
271 return self._dict[key]
272
273 def __setitem__(self, key: str, value: types.AnyValue) -> None:
274 if getattr(self, "_immutable", False): # type: ignore
275 raise TypeError
276 with self._lock:
277 if self.maxlen is not None and self.maxlen == 0:
278 self.dropped += 1
279 return
280
281 if self._extended_attributes:
282 value = _clean_extended_attribute(
283 key, value, self.max_value_len
284 )
285 else:
286 value = _clean_attribute(key, value, self.max_value_len) # type: ignore
287 if value is None:
288 return
289
290 if key in self._dict:
291 del self._dict[key]
292 elif self.maxlen is not None and len(self._dict) == self.maxlen:
293 if not isinstance(self._dict, OrderedDict):
294 self._dict = OrderedDict(self._dict)
295 self._dict.popitem(last=False) # type: ignore
296 self.dropped += 1
297
298 self._dict[key] = value # type: ignore
299
300 def __delitem__(self, key: str) -> None:
301 if getattr(self, "_immutable", False): # type: ignore
302 raise TypeError
303 with self._lock:
304 del self._dict[key]
305
306 def __iter__(self): # type: ignore
307 with self._lock:
308 return iter(self._dict.copy()) # type: ignore
309
310 def __len__(self) -> int:
311 return len(self._dict)
312
313 def copy(self): # type: ignore
314 return self._dict.copy() # type: ignore