Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/asn1crypto/

1# coding: utf-8

3"""

4Functions to convert unicode IRIs into ASCII byte string URIs and back. Exports

5the following items:

7 - iri_to_uri()

8 - uri_to_iri()

9"""

11from __future__ import unicode_literals, division, absolute_import, print_function

13from encodings import idna # noqa

14import codecs

15import re

16import sys

18from ._errors import unwrap

19from ._types import byte_cls, str_cls, type_name, bytes_to_list, int_types

21if sys.version_info < (3,):

22 from urlparse import urlsplit, urlunsplit

23 from urllib import (

24 quote as urlquote,

25 unquote as unquote_to_bytes,

26 )

28else:

29 from urllib.parse import (

30 quote as urlquote,

31 unquote_to_bytes,

32 urlsplit,

33 urlunsplit,

34 )

37def iri_to_uri(value, normalize=False):

38 """

39 Encodes a unicode IRI into an ASCII byte string URI

41 :param value:

42 A unicode string of an IRI

44 :param normalize:

45 A bool that controls URI normalization

47 :return:

48 A byte string of the ASCII-encoded URI

49 """

51 if not isinstance(value, str_cls):

52 raise TypeError(unwrap(

53 '''

54 value must be a unicode string, not %s

55 ''',

56 type_name(value)

57 ))

59 scheme = None

60 # Python 2.6 doesn't split properly is the URL doesn't start with http:// or https://

61 if sys.version_info < (2, 7) and not value.startswith('http://') and not value.startswith('https://'):

62 real_prefix = None

63 prefix_match = re.match('^[^:]*://', value)

64 if prefix_match:

65 real_prefix = prefix_match.group(0)

66 value = 'http://' + value[len(real_prefix):]

67 parsed = urlsplit(value)

68 if real_prefix:

69 value = real_prefix + value[7:]

70 scheme = _urlquote(real_prefix[:-3])

71 else:

72 parsed = urlsplit(value)

74 if scheme is None:

75 scheme = _urlquote(parsed.scheme)

76 hostname = parsed.hostname

77 if hostname is not None:

78 hostname = hostname.encode('idna')

79 # RFC 3986 allows userinfo to contain sub-delims

80 username = _urlquote(parsed.username, safe='!$&\'()*+,;=')

81 password = _urlquote(parsed.password, safe='!$&\'()*+,;=')

82 port = parsed.port

83 if port is not None:

84 port = str_cls(port).encode('ascii')

86 netloc = b''

87 if username is not None:

88 netloc += username

89 if password:

90 netloc += b':' + password

91 netloc += b'@'

92 if hostname is not None:

93 netloc += hostname

94 if port is not None:

95 default_http = scheme == b'http' and port == b'80'

96 default_https = scheme == b'https' and port == b'443'

97 if not normalize or (not default_http and not default_https):

98 netloc += b':' + port

100 # RFC 3986 allows a path to contain sub-delims, plus "@" and ":"

101 path = _urlquote(parsed.path, safe='/!$&\'()*+,;=@:')

102 # RFC 3986 allows the query to contain sub-delims, plus "@", ":" , "/" and "?"

103 query = _urlquote(parsed.query, safe='/?!$&\'()*+,;=@:')

104 # RFC 3986 allows the fragment to contain sub-delims, plus "@", ":" , "/" and "?"

105 fragment = _urlquote(parsed.fragment, safe='/?!$&\'()*+,;=@:')

106

107 if normalize and query is None and fragment is None and path == b'/':

108 path = None

109

110 # Python 2.7 compat

111 if path is None:

112 path = ''

113

114 output = urlunsplit((scheme, netloc, path, query, fragment))

115 if isinstance(output, str_cls):

116 output = output.encode('latin1')

117 return output

118

119

120def uri_to_iri(value):

121 """

122 Converts an ASCII URI byte string into a unicode IRI

123

124 :param value:

125 An ASCII-encoded byte string of the URI

126

127 :return:

128 A unicode string of the IRI

129 """

130

131 if not isinstance(value, byte_cls):

132 raise TypeError(unwrap(

133 '''

134 value must be a byte string, not %s

135 ''',

136 type_name(value)

137 ))

138

139 parsed = urlsplit(value)

140

141 scheme = parsed.scheme

142 if scheme is not None:

143 scheme = scheme.decode('ascii')

144

145 username = _urlunquote(parsed.username, remap=[':', '@'])

146 password = _urlunquote(parsed.password, remap=[':', '@'])

147 hostname = parsed.hostname

148 if hostname:

149 hostname = hostname.decode('idna')

150 port = parsed.port

151 if port and not isinstance(port, int_types):

152 port = port.decode('ascii')

153

154 netloc = ''

155 if username is not None:

156 netloc += username

157 if password:

158 netloc += ':' + password

159 netloc += '@'

160 if hostname is not None:

161 netloc += hostname

162 if port is not None:

163 netloc += ':' + str_cls(port)

164

165 path = _urlunquote(parsed.path, remap=['/'], preserve=True)

166 query = _urlunquote(parsed.query, remap=['&', '='], preserve=True)

167 fragment = _urlunquote(parsed.fragment)

168

169 return urlunsplit((scheme, netloc, path, query, fragment))

170

171

172def _iri_utf8_errors_handler(exc):

173 """

174 Error handler for decoding UTF-8 parts of a URI into an IRI. Leaves byte

175 sequences encoded in %XX format, but as part of a unicode string.

176

177 :param exc:

178 The UnicodeDecodeError exception

179

180 :return:

181 A 2-element tuple of (replacement unicode string, integer index to

182 resume at)

183 """

184

185 bytes_as_ints = bytes_to_list(exc.object[exc.start:exc.end])

186 replacements = ['%%%02x' % num for num in bytes_as_ints]

187 return (''.join(replacements), exc.end)

188

189

190codecs.register_error('iriutf8', _iri_utf8_errors_handler)

191

192

193def _urlquote(string, safe=''):

194 """

195 Quotes a unicode string for use in a URL

196

197 :param string:

198 A unicode string

199

200 :param safe:

201 A unicode string of character to not encode

202

203 :return:

204 None (if string is None) or an ASCII byte string of the quoted string

205 """

206

207 if string is None or string == '':

208 return None

209

210 # Anything already hex quoted is pulled out of the URL and unquoted if

211 # possible

212 escapes = []

213 if re.search('%[0-9a-fA-F]{2}', string):

214 # Try to unquote any percent values, restoring them if they are not

215 # valid UTF-8. Also, requote any safe chars since encoded versions of

216 # those are functionally different than the unquoted ones.

217 def _try_unescape(match):

218 byte_string = unquote_to_bytes(match.group(0))

219 unicode_string = byte_string.decode('utf-8', 'iriutf8')

220 for safe_char in list(safe):

221 unicode_string = unicode_string.replace(safe_char, '%%%02x' % ord(safe_char))

222 return unicode_string

223 string = re.sub('(?:%[0-9a-fA-F]{2})+', _try_unescape, string)

224

225 # Once we have the minimal set of hex quoted values, removed them from

226 # the string so that they are not double quoted

227 def _extract_escape(match):

228 escapes.append(match.group(0).encode('ascii'))

229 return '\x00'

230 string = re.sub('%[0-9a-fA-F]{2}', _extract_escape, string)

231

232 output = urlquote(string.encode('utf-8'), safe=safe.encode('utf-8'))

233 if not isinstance(output, byte_cls):

234 output = output.encode('ascii')

235

236 # Restore the existing quoted values that we extracted

237 if len(escapes) > 0:

238 def _return_escape(_):

239 return escapes.pop(0)

240 output = re.sub(b'%00', _return_escape, output)

241

242 return output

243

244

245def _urlunquote(byte_string, remap=None, preserve=None):

246 """

247 Unquotes a URI portion from a byte string into unicode using UTF-8

248

249 :param byte_string:

250 A byte string of the data to unquote

251

252 :param remap:

253 A list of characters (as unicode) that should be re-mapped to a

254 %XX encoding. This is used when characters are not valid in part of a

255 URL.

256

257 :param preserve:

258 A bool - indicates that the chars to be remapped if they occur in

259 non-hex form, should be preserved. E.g. / for URL path.

260

261 :return:

262 A unicode string

263 """

264

265 if byte_string is None:

266 return byte_string

267

268 if byte_string == b'':

269 return ''

270

271 if preserve:

272 replacements = ['\x1A', '\x1C', '\x1D', '\x1E', '\x1F']

273 preserve_unmap = {}

274 for char in remap:

275 replacement = replacements.pop(0)

276 preserve_unmap[replacement] = char

277 byte_string = byte_string.replace(char.encode('ascii'), replacement.encode('ascii'))

278

279 byte_string = unquote_to_bytes(byte_string)

280

281 if remap:

282 for char in remap:

283 byte_string = byte_string.replace(char.encode('ascii'), ('%%%02x' % ord(char)).encode('ascii'))

284

285 output = byte_string.decode('utf-8', 'iriutf8')

286

287 if preserve:

288 for replacement, original in preserve_unmap.items():

289 output = output.replace(replacement, original)

290

291 return output

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/asn1crypto/_iri.py: 11%

141 statements