1# Copyright 2017, Google LLC All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from typing import Dict, Optional, Union
16
17
18MIN_ACK_DEADLINE = 10
19MAX_ACK_DEADLINE = 600
20
21
22class Histogram(object):
23 """Representation of a single histogram.
24
25 The purpose of this class is to store actual ack timing information
26 in order to predict how long to renew leases.
27
28 The default implementation uses the 99th percentile of previous ack
29 times to implicitly lease messages; however, custom
30 :class:`~.pubsub_v1.subscriber._consumer.Consumer` subclasses
31 are free to use a different formula.
32
33 The precision of data stored is to the nearest integer. Additionally,
34 values outside the range of ``MIN_ACK_DEADLINE <= x <= MAX_ACK_DEADLINE`` are stored
35 as ``MIN_ACK_DEADLINE`` or ``MAX_ACK_DEADLINE``, since these are the boundaries of
36 leases in the actual API.
37 """
38
39 def __init__(self, data: Optional[Dict[int, int]] = None):
40 """Instantiate the histogram.
41
42 Args:
43 data:
44 The data strucure to be used to store the underlying data. The default
45 is an empty dictionary. This can be set to a dictionary-like object if
46 required (for example, if a special object is needed for concurrency
47 reasons).
48 """
49 # The data is stored as a dictionary, with the keys being the
50 # value being added and the values being the number of times that
51 # value was added to the dictionary.
52 #
53 # This is depending on the Python interpreter's implicit ordering
54 # of dictionaries, which is a bitwise sort by the key's ``hash()``
55 # value. Because ``hash(int i) -> i`` and all of our keys are
56 # positive integers (negatives would be a problem because the sort
57 # is bitwise), we can rely on this.
58 if data is None:
59 data = {}
60 self._data = data
61 self._len = 0
62
63 def __len__(self) -> int:
64 """Return the total number of data points in this histogram.
65
66 This is cached on a separate counter (rather than computing it using
67 ``sum([v for v in self._data.values()])``) to optimize lookup.
68
69 Returns:
70 The total number of data points in this histogram.
71 """
72 return self._len
73
74 def __contains__(self, needle: int) -> bool:
75 """Return ``True`` if needle is present in the histogram, ``False`` otherwise."""
76 return needle in self._data
77
78 def __repr__(self):
79 return "<Histogram: {len} values between {min} and {max}>".format(
80 len=len(self), max=self.max, min=self.min
81 )
82
83 @property
84 def max(self) -> int:
85 """Return the maximum value in this histogram.
86
87 If there are no values in the histogram at all, return ``MAX_ACK_DEADLINE``.
88
89 Returns:
90 The maximum value in the histogram.
91 """
92 if len(self._data) == 0:
93 return MAX_ACK_DEADLINE
94 return next(iter(reversed(sorted(self._data.keys()))))
95
96 @property
97 def min(self) -> int:
98 """Return the minimum value in this histogram.
99
100 If there are no values in the histogram at all, return ``MIN_ACK_DEADLINE``.
101
102 Returns:
103 The minimum value in the histogram.
104 """
105 if len(self._data) == 0:
106 return MIN_ACK_DEADLINE
107 return next(iter(sorted(self._data.keys())))
108
109 def add(self, value: Union[int, float]) -> None:
110 """Add the value to this histogram.
111
112 Args:
113 value:
114 The value. Values outside of
115 ``MIN_ACK_DEADLINE <= x <= MAX_ACK_DEADLINE``
116 will be raised to ``MIN_ACK_DEADLINE`` or reduced to
117 ``MAX_ACK_DEADLINE``.
118 """
119 # If the value is out of bounds, bring it in bounds.
120 value = int(value)
121 if value < MIN_ACK_DEADLINE:
122 value = MIN_ACK_DEADLINE
123 elif value > MAX_ACK_DEADLINE:
124 value = MAX_ACK_DEADLINE
125
126 # Add the value to the histogram's data dictionary.
127 self._data.setdefault(value, 0)
128 self._data[value] += 1
129 self._len += 1
130
131 def percentile(self, percent: Union[int, float]) -> int:
132 """Return the value that is the Nth precentile in the histogram.
133
134 Args:
135 percent:
136 The precentile being sought. The default consumer implementations
137 consistently use ``99``.
138
139 Returns:
140 The value corresponding to the requested percentile.
141 """
142 # Sanity check: Any value over 100 should become 100.
143 if percent >= 100:
144 percent = 100
145
146 # Determine the actual target number.
147 target = len(self) - len(self) * (percent / 100)
148
149 # Iterate over the values in reverse, dropping the target by the
150 # number of times each value has been seen. When the target passes
151 # 0, return the value we are currently viewing.
152 for k in reversed(sorted(self._data.keys())):
153 target -= self._data[k]
154 if target < 0:
155 return k
156
157 # The only way to get here is if there was no data.
158 # In this case, just return the shortest possible deadline.
159 return MIN_ACK_DEADLINE