Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/pubsub_v1/subscriber/_protocol/histogram.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

44 statements  

1# Copyright 2017, Google LLC All rights reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15from typing import Dict, Optional, Union 

16 

17 

18MIN_ACK_DEADLINE = 10 

19MAX_ACK_DEADLINE = 600 

20 

21 

22class Histogram(object): 

23 """Representation of a single histogram. 

24 

25 The purpose of this class is to store actual ack timing information 

26 in order to predict how long to renew leases. 

27 

28 The default implementation uses the 99th percentile of previous ack 

29 times to implicitly lease messages; however, custom 

30 :class:`~.pubsub_v1.subscriber._consumer.Consumer` subclasses 

31 are free to use a different formula. 

32 

33 The precision of data stored is to the nearest integer. Additionally, 

34 values outside the range of ``MIN_ACK_DEADLINE <= x <= MAX_ACK_DEADLINE`` are stored 

35 as ``MIN_ACK_DEADLINE`` or ``MAX_ACK_DEADLINE``, since these are the boundaries of 

36 leases in the actual API. 

37 """ 

38 

39 def __init__(self, data: Optional[Dict[int, int]] = None): 

40 """Instantiate the histogram. 

41 

42 Args: 

43 data: 

44 The data strucure to be used to store the underlying data. The default 

45 is an empty dictionary. This can be set to a dictionary-like object if 

46 required (for example, if a special object is needed for concurrency 

47 reasons). 

48 """ 

49 # The data is stored as a dictionary, with the keys being the 

50 # value being added and the values being the number of times that 

51 # value was added to the dictionary. 

52 # 

53 # This is depending on the Python interpreter's implicit ordering 

54 # of dictionaries, which is a bitwise sort by the key's ``hash()`` 

55 # value. Because ``hash(int i) -> i`` and all of our keys are 

56 # positive integers (negatives would be a problem because the sort 

57 # is bitwise), we can rely on this. 

58 if data is None: 

59 data = {} 

60 self._data = data 

61 self._len = 0 

62 

63 def __len__(self) -> int: 

64 """Return the total number of data points in this histogram. 

65 

66 This is cached on a separate counter (rather than computing it using 

67 ``sum([v for v in self._data.values()])``) to optimize lookup. 

68 

69 Returns: 

70 The total number of data points in this histogram. 

71 """ 

72 return self._len 

73 

74 def __contains__(self, needle: int) -> bool: 

75 """Return ``True`` if needle is present in the histogram, ``False`` otherwise.""" 

76 return needle in self._data 

77 

78 def __repr__(self): 

79 return "<Histogram: {len} values between {min} and {max}>".format( 

80 len=len(self), max=self.max, min=self.min 

81 ) 

82 

83 @property 

84 def max(self) -> int: 

85 """Return the maximum value in this histogram. 

86 

87 If there are no values in the histogram at all, return ``MAX_ACK_DEADLINE``. 

88 

89 Returns: 

90 The maximum value in the histogram. 

91 """ 

92 if len(self._data) == 0: 

93 return MAX_ACK_DEADLINE 

94 return next(iter(reversed(sorted(self._data.keys())))) 

95 

96 @property 

97 def min(self) -> int: 

98 """Return the minimum value in this histogram. 

99 

100 If there are no values in the histogram at all, return ``MIN_ACK_DEADLINE``. 

101 

102 Returns: 

103 The minimum value in the histogram. 

104 """ 

105 if len(self._data) == 0: 

106 return MIN_ACK_DEADLINE 

107 return next(iter(sorted(self._data.keys()))) 

108 

109 def add(self, value: Union[int, float]) -> None: 

110 """Add the value to this histogram. 

111 

112 Args: 

113 value: 

114 The value. Values outside of 

115 ``MIN_ACK_DEADLINE <= x <= MAX_ACK_DEADLINE`` 

116 will be raised to ``MIN_ACK_DEADLINE`` or reduced to 

117 ``MAX_ACK_DEADLINE``. 

118 """ 

119 # If the value is out of bounds, bring it in bounds. 

120 value = int(value) 

121 if value < MIN_ACK_DEADLINE: 

122 value = MIN_ACK_DEADLINE 

123 elif value > MAX_ACK_DEADLINE: 

124 value = MAX_ACK_DEADLINE 

125 

126 # Add the value to the histogram's data dictionary. 

127 self._data.setdefault(value, 0) 

128 self._data[value] += 1 

129 self._len += 1 

130 

131 def percentile(self, percent: Union[int, float]) -> int: 

132 """Return the value that is the Nth precentile in the histogram. 

133 

134 Args: 

135 percent: 

136 The precentile being sought. The default consumer implementations 

137 consistently use ``99``. 

138 

139 Returns: 

140 The value corresponding to the requested percentile. 

141 """ 

142 # Sanity check: Any value over 100 should become 100. 

143 if percent >= 100: 

144 percent = 100 

145 

146 # Determine the actual target number. 

147 target = len(self) - len(self) * (percent / 100) 

148 

149 # Iterate over the values in reverse, dropping the target by the 

150 # number of times each value has been seen. When the target passes 

151 # 0, return the value we are currently viewing. 

152 for k in reversed(sorted(self._data.keys())): 

153 target -= self._data[k] 

154 if target < 0: 

155 return k 

156 

157 # The only way to get here is if there was no data. 

158 # In this case, just return the shortest possible deadline. 

159 return MIN_ACK_DEADLINE