Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/genshi/util.py: 38%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2006-2009 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
14"""Various utility classes and functions."""
16import re
18from genshi.compat import html_entities, unichr
20__docformat__ = 'restructuredtext en'
23class LRUCache(dict):
24 """A dictionary-like object that stores only a certain number of items, and
25 discards its least recently used item when full.
27 >>> cache = LRUCache(3)
28 >>> cache['A'] = 0
29 >>> cache['B'] = 1
30 >>> cache['C'] = 2
31 >>> len(cache)
32 3
34 >>> cache['A']
35 0
37 Adding new items to the cache does not increase its size. Instead, the least
38 recently used item is dropped:
40 >>> cache['D'] = 3
41 >>> len(cache)
42 3
43 >>> 'B' in cache
44 False
46 Iterating over the cache returns the keys, starting with the most recently
47 used:
49 >>> for key in cache:
50 ... print(key)
51 D
52 A
53 C
55 This code is based on the LRUCache class from ``myghtyutils.util``, written
56 by Mike Bayer and released under the MIT license. See:
58 http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
59 """
61 class _Item(object):
62 def __init__(self, key, value):
63 self.prv = self.nxt = None
64 self.key = key
65 self.value = value
66 def __repr__(self):
67 return repr(self.value)
69 def __init__(self, capacity):
70 self._dict = dict()
71 self.capacity = capacity
72 self.head = None
73 self.tail = None
75 def __contains__(self, key):
76 return key in self._dict
78 def __iter__(self):
79 cur = self.head
80 while cur:
81 yield cur.key
82 cur = cur.nxt
84 def __len__(self):
85 return len(self._dict)
87 def __getitem__(self, key):
88 item = self._dict[key]
89 self._update_item(item)
90 return item.value
92 def __setitem__(self, key, value):
93 item = self._dict.get(key)
94 if item is None:
95 item = self._Item(key, value)
96 self._dict[key] = item
97 self._insert_item(item)
98 else:
99 item.value = value
100 self._update_item(item)
101 self._manage_size()
103 def __repr__(self):
104 return repr(self._dict)
106 def _insert_item(self, item):
107 item.prv = None
108 item.nxt = self.head
109 if self.head is not None:
110 self.head.prv = item
111 else:
112 self.tail = item
113 self.head = item
114 self._manage_size()
116 def _manage_size(self):
117 while len(self._dict) > self.capacity:
118 del self._dict[self.tail.key]
119 if self.tail != self.head:
120 self.tail = self.tail.prv
121 self.tail.nxt = None
122 else:
123 self.head = self.tail = None
125 def _update_item(self, item):
126 if self.head == item:
127 return
129 prv = item.prv
130 prv.nxt = item.nxt
131 if item.nxt is not None:
132 item.nxt.prv = prv
133 else:
134 self.tail = prv
136 item.prv = None
137 item.nxt = self.head
138 self.head.prv = self.head = item
141def flatten(items):
142 """Flattens a potentially nested sequence into a flat list.
144 :param items: the sequence to flatten
146 >>> flatten((1, 2))
147 [1, 2]
148 >>> flatten([1, (2, 3), 4])
149 [1, 2, 3, 4]
150 >>> flatten([1, (2, [3, 4]), 5])
151 [1, 2, 3, 4, 5]
152 """
153 retval = []
154 for item in items:
155 if isinstance(item, (frozenset, list, set, tuple)):
156 retval += flatten(item)
157 else:
158 retval.append(item)
159 return retval
162def plaintext(text, keeplinebreaks=True):
163 """Return the text with all entities and tags removed.
165 >>> plaintext('<b>1 < 2</b>')
166 '1 < 2'
168 The `keeplinebreaks` parameter can be set to ``False`` to replace any line
169 breaks by simple spaces:
171 >>> plaintext('''<b>1
172 ... <
173 ... 2</b>''', keeplinebreaks=False)
174 '1 < 2'
176 :param text: the text to convert to plain text
177 :param keeplinebreaks: whether line breaks in the text should be kept intact
178 :return: the text with tags and entities removed
179 """
180 text = stripentities(striptags(text))
181 if not keeplinebreaks:
182 text = text.replace('\n', ' ')
183 return text
186_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
187def stripentities(text, keepxmlentities=False):
188 """Return a copy of the given text with any character or numeric entities
189 replaced by the equivalent UTF-8 characters.
191 >>> stripentities('1 < 2')
192 '1 < 2'
193 >>> stripentities('more …')
194 'more \u2026'
195 >>> stripentities('…')
196 '\u2026'
197 >>> stripentities('…')
198 '\u2026'
200 If the `keepxmlentities` parameter is provided and is a truth value, the
201 core XML entities (&, ', >, < and ") are left intact.
203 >>> stripentities('1 < 2 …', keepxmlentities=True)
204 '1 < 2 \u2026'
205 """
206 def _replace_entity(match):
207 if match.group(1): # numeric entity
208 ref = match.group(1)
209 if ref.startswith('x'):
210 ref = int(ref[1:], 16)
211 else:
212 ref = int(ref, 10)
213 return unichr(ref)
214 else: # character entity
215 ref = match.group(2)
216 if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
217 return '&%s;' % ref
218 try:
219 return unichr(html_entities.name2codepoint[ref])
220 except KeyError:
221 if keepxmlentities:
222 return '&%s;' % ref
223 else:
224 return ref
225 return _STRIPENTITIES_RE.sub(_replace_entity, text)
228_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
229def striptags(text):
230 """Return a copy of the text with any XML/HTML tags removed.
232 >>> striptags('<span>Foo</span> bar')
233 'Foo bar'
234 >>> striptags('<span class="bar">Foo</span>')
235 'Foo'
236 >>> striptags('Foo<br />')
237 'Foo'
239 HTML/XML comments are stripped, too:
241 >>> striptags('<!-- <blub>hehe</blah> -->test')
242 'test'
244 :param text: the string to remove tags from
245 :return: the text with tags removed
246 """
247 return _STRIPTAGS_RE.sub('', text)