Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/genshi/util.py: 38%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

102 statements  

1# -*- coding: utf-8 -*- 

2# 

3# Copyright (C) 2006-2009 Edgewall Software 

4# All rights reserved. 

5# 

6# This software is licensed as described in the file COPYING, which 

7# you should have received as part of this distribution. The terms 

8# are also available at http://genshi.edgewall.org/wiki/License. 

9# 

10# This software consists of voluntary contributions made by many 

11# individuals. For the exact contribution history, see the revision 

12# history and logs, available at http://genshi.edgewall.org/log/. 

13 

14"""Various utility classes and functions.""" 

15 

16import re 

17 

18from genshi.compat import html_entities, unichr 

19 

20__docformat__ = 'restructuredtext en' 

21 

22 

23class LRUCache(dict): 

24 """A dictionary-like object that stores only a certain number of items, and 

25 discards its least recently used item when full. 

26  

27 >>> cache = LRUCache(3) 

28 >>> cache['A'] = 0 

29 >>> cache['B'] = 1 

30 >>> cache['C'] = 2 

31 >>> len(cache) 

32 3 

33  

34 >>> cache['A'] 

35 0 

36  

37 Adding new items to the cache does not increase its size. Instead, the least 

38 recently used item is dropped: 

39  

40 >>> cache['D'] = 3 

41 >>> len(cache) 

42 3 

43 >>> 'B' in cache 

44 False 

45  

46 Iterating over the cache returns the keys, starting with the most recently 

47 used: 

48  

49 >>> for key in cache: 

50 ... print(key) 

51 D 

52 A 

53 C 

54 

55 This code is based on the LRUCache class from ``myghtyutils.util``, written 

56 by Mike Bayer and released under the MIT license. See: 

57 

58 http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py 

59 """ 

60 

61 class _Item(object): 

62 def __init__(self, key, value): 

63 self.prv = self.nxt = None 

64 self.key = key 

65 self.value = value 

66 def __repr__(self): 

67 return repr(self.value) 

68 

69 def __init__(self, capacity): 

70 self._dict = dict() 

71 self.capacity = capacity 

72 self.head = None 

73 self.tail = None 

74 

75 def __contains__(self, key): 

76 return key in self._dict 

77 

78 def __iter__(self): 

79 cur = self.head 

80 while cur: 

81 yield cur.key 

82 cur = cur.nxt 

83 

84 def __len__(self): 

85 return len(self._dict) 

86 

87 def __getitem__(self, key): 

88 item = self._dict[key] 

89 self._update_item(item) 

90 return item.value 

91 

92 def __setitem__(self, key, value): 

93 item = self._dict.get(key) 

94 if item is None: 

95 item = self._Item(key, value) 

96 self._dict[key] = item 

97 self._insert_item(item) 

98 else: 

99 item.value = value 

100 self._update_item(item) 

101 self._manage_size() 

102 

103 def __repr__(self): 

104 return repr(self._dict) 

105 

106 def _insert_item(self, item): 

107 item.prv = None 

108 item.nxt = self.head 

109 if self.head is not None: 

110 self.head.prv = item 

111 else: 

112 self.tail = item 

113 self.head = item 

114 self._manage_size() 

115 

116 def _manage_size(self): 

117 while len(self._dict) > self.capacity: 

118 del self._dict[self.tail.key] 

119 if self.tail != self.head: 

120 self.tail = self.tail.prv 

121 self.tail.nxt = None 

122 else: 

123 self.head = self.tail = None 

124 

125 def _update_item(self, item): 

126 if self.head == item: 

127 return 

128 

129 prv = item.prv 

130 prv.nxt = item.nxt 

131 if item.nxt is not None: 

132 item.nxt.prv = prv 

133 else: 

134 self.tail = prv 

135 

136 item.prv = None 

137 item.nxt = self.head 

138 self.head.prv = self.head = item 

139 

140 

141def flatten(items): 

142 """Flattens a potentially nested sequence into a flat list. 

143  

144 :param items: the sequence to flatten 

145  

146 >>> flatten((1, 2)) 

147 [1, 2] 

148 >>> flatten([1, (2, 3), 4]) 

149 [1, 2, 3, 4] 

150 >>> flatten([1, (2, [3, 4]), 5]) 

151 [1, 2, 3, 4, 5] 

152 """ 

153 retval = [] 

154 for item in items: 

155 if isinstance(item, (frozenset, list, set, tuple)): 

156 retval += flatten(item) 

157 else: 

158 retval.append(item) 

159 return retval 

160 

161 

162def plaintext(text, keeplinebreaks=True): 

163 """Return the text with all entities and tags removed. 

164  

165 >>> plaintext('<b>1 &lt; 2</b>') 

166 '1 < 2' 

167  

168 The `keeplinebreaks` parameter can be set to ``False`` to replace any line 

169 breaks by simple spaces: 

170  

171 >>> plaintext('''<b>1 

172 ... &lt; 

173 ... 2</b>''', keeplinebreaks=False) 

174 '1 < 2' 

175  

176 :param text: the text to convert to plain text 

177 :param keeplinebreaks: whether line breaks in the text should be kept intact 

178 :return: the text with tags and entities removed 

179 """ 

180 text = stripentities(striptags(text)) 

181 if not keeplinebreaks: 

182 text = text.replace('\n', ' ') 

183 return text 

184 

185 

186_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)') 

187def stripentities(text, keepxmlentities=False): 

188 """Return a copy of the given text with any character or numeric entities 

189 replaced by the equivalent UTF-8 characters. 

190  

191 >>> stripentities('1 &lt; 2') 

192 '1 < 2' 

193 >>> stripentities('more &hellip;') 

194 'more \u2026' 

195 >>> stripentities('&#8230;') 

196 '\u2026' 

197 >>> stripentities('&#x2026;') 

198 '\u2026' 

199  

200 If the `keepxmlentities` parameter is provided and is a truth value, the 

201 core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact. 

202  

203 >>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True) 

204 '1 &lt; 2 \u2026' 

205 """ 

206 def _replace_entity(match): 

207 if match.group(1): # numeric entity 

208 ref = match.group(1) 

209 if ref.startswith('x'): 

210 ref = int(ref[1:], 16) 

211 else: 

212 ref = int(ref, 10) 

213 return unichr(ref) 

214 else: # character entity 

215 ref = match.group(2) 

216 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'): 

217 return '&%s;' % ref 

218 try: 

219 return unichr(html_entities.name2codepoint[ref]) 

220 except KeyError: 

221 if keepxmlentities: 

222 return '&amp;%s;' % ref 

223 else: 

224 return ref 

225 return _STRIPENTITIES_RE.sub(_replace_entity, text) 

226 

227 

228_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)') 

229def striptags(text): 

230 """Return a copy of the text with any XML/HTML tags removed. 

231  

232 >>> striptags('<span>Foo</span> bar') 

233 'Foo bar' 

234 >>> striptags('<span class="bar">Foo</span>') 

235 'Foo' 

236 >>> striptags('Foo<br />') 

237 'Foo' 

238  

239 HTML/XML comments are stripped, too: 

240  

241 >>> striptags('<!-- <blub>hehe</blah> -->test') 

242 'test' 

243  

244 :param text: the string to remove tags from 

245 :return: the text with tags removed 

246 """ 

247 return _STRIPTAGS_RE.sub('', text)