Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/genshi/util.py: 38%

1# -*- coding: utf-8 -*-

6# This software is licensed as described in the file COPYING, which

7# you should have received as part of this distribution. The terms

8# are also available at http://genshi.edgewall.org/wiki/License.

10# This software consists of voluntary contributions made by many

11# individuals. For the exact contribution history, see the revision

12# history and logs, available at http://genshi.edgewall.org/log/.

14"""Various utility classes and functions."""

16import re

18from genshi.compat import html_entities, unichr

20__docformat__ = 'restructuredtext en'

23class LRUCache(dict):

24 """A dictionary-like object that stores only a certain number of items, and

25 discards its least recently used item when full.

27 >>> cache = LRUCache(3)

28 >>> cache['A'] = 0

29 >>> cache['B'] = 1

30 >>> cache['C'] = 2

31 >>> len(cache)

32 3

34 >>> cache['A']

35 0

37 Adding new items to the cache does not increase its size. Instead, the least

38 recently used item is dropped:

40 >>> cache['D'] = 3

41 >>> len(cache)

42 3

43 >>> 'B' in cache

44 False

46 Iterating over the cache returns the keys, starting with the most recently

47 used:

49 >>> for key in cache:

50 ... print(key)

51 D

52 A

53 C

55 This code is based on the LRUCache class from ``myghtyutils.util``, written

56 by Mike Bayer and released under the MIT license. See:

58 http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py

59 """

61 class _Item(object):

62 def __init__(self, key, value):

63 self.prv = self.nxt = None

64 self.key = key

65 self.value = value

66 def __repr__(self):

67 return repr(self.value)

69 def __init__(self, capacity):

70 self._dict = dict()

71 self.capacity = capacity

72 self.head = None

73 self.tail = None

75 def __contains__(self, key):

76 return key in self._dict

78 def __iter__(self):

79 cur = self.head

80 while cur:

81 yield cur.key

82 cur = cur.nxt

84 def __len__(self):

85 return len(self._dict)

87 def __getitem__(self, key):

88 item = self._dict[key]

89 self._update_item(item)

90 return item.value

92 def __setitem__(self, key, value):

93 item = self._dict.get(key)

94 if item is None:

95 item = self._Item(key, value)

96 self._dict[key] = item

97 self._insert_item(item)

98 else:

99 item.value = value

100 self._update_item(item)

101 self._manage_size()

102

103 def __repr__(self):

104 return repr(self._dict)

105

106 def _insert_item(self, item):

107 item.prv = None

108 item.nxt = self.head

109 if self.head is not None:

110 self.head.prv = item

111 else:

112 self.tail = item

113 self.head = item

114 self._manage_size()

115

116 def _manage_size(self):

117 while len(self._dict) > self.capacity:

118 del self._dict[self.tail.key]

119 if self.tail != self.head:

120 self.tail = self.tail.prv

121 self.tail.nxt = None

122 else:

123 self.head = self.tail = None

124

125 def _update_item(self, item):

126 if self.head == item:

127 return

128

129 prv = item.prv

130 prv.nxt = item.nxt

131 if item.nxt is not None:

132 item.nxt.prv = prv

133 else:

134 self.tail = prv

135

136 item.prv = None

137 item.nxt = self.head

138 self.head.prv = self.head = item

139

140

141def flatten(items):

142 """Flattens a potentially nested sequence into a flat list.

143

144 :param items: the sequence to flatten

145

146 >>> flatten((1, 2))

147 [1, 2]

148 >>> flatten([1, (2, 3), 4])

149 [1, 2, 3, 4]

150 >>> flatten([1, (2, [3, 4]), 5])

151 [1, 2, 3, 4, 5]

152 """

153 retval = []

154 for item in items:

155 if isinstance(item, (frozenset, list, set, tuple)):

156 retval += flatten(item)

157 else:

158 retval.append(item)

159 return retval

160

161

162def plaintext(text, keeplinebreaks=True):

163 """Return the text with all entities and tags removed.

164

165 >>> plaintext('1 < 2')

166 '1 < 2'

167

168 The `keeplinebreaks` parameter can be set to ``False`` to replace any line

169 breaks by simple spaces:

170

171 >>> plaintext('''1

172 ... <

173 ... 2''', keeplinebreaks=False)

174 '1 < 2'

175

176 :param text: the text to convert to plain text

177 :param keeplinebreaks: whether line breaks in the text should be kept intact

178 :return: the text with tags and entities removed

179 """

180 text = stripentities(striptags(text))

181 if not keeplinebreaks:

182 text = text.replace('\n', ' ')

183 return text

184

185

186_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')

187def stripentities(text, keepxmlentities=False):

188 """Return a copy of the given text with any character or numeric entities

189 replaced by the equivalent UTF-8 characters.

190

191 >>> stripentities('1 < 2')

192 '1 < 2'

193 >>> stripentities('more …')

194 'more \u2026'

195 >>> stripentities('…')

196 '\u2026'

197 >>> stripentities('…')

198 '\u2026'

199

200 If the `keepxmlentities` parameter is provided and is a truth value, the

201 core XML entities (&, ', >, < and ") are left intact.

202

203 >>> stripentities('1 < 2 …', keepxmlentities=True)

204 '1 < 2 \u2026'

205 """

206 def _replace_entity(match):

207 if match.group(1): # numeric entity

208 ref = match.group(1)

209 if ref.startswith('x'):

210 ref = int(ref[1:], 16)

211 else:

212 ref = int(ref, 10)

213 return unichr(ref)

214 else: # character entity

215 ref = match.group(2)

216 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):

217 return '&%s;' % ref

218 try:

219 return unichr(html_entities.name2codepoint[ref])

220 except KeyError:

221 if keepxmlentities:

222 return '&%s;' % ref

223 else:

224 return ref

225 return _STRIPENTITIES_RE.sub(_replace_entity, text)

226

227

228_STRIPTAGS_RE = re.compile(r'(|<[^>]*>)')

229def striptags(text):

230 """Return a copy of the text with any XML/HTML tags removed.

231

232 >>> striptags('Foo bar')

233 'Foo bar'

234 >>> striptags('Foo')

235 'Foo'

236 >>> striptags('Foo ')

237 'Foo'

238

239 HTML/XML comments are stripped, too:

240

241 >>> striptags('test')

242 'test'

243

244 :param text: the string to remove tags from

245 :return: the text with tags removed

246 """

247 return _STRIPTAGS_RE.sub('', text)