Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/ansible_core-2.17.0.dev0-py3.8.egg/ansible/module_utils/common/text/converters.py: 18%

111 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-30 06:38 +0000

1# -*- coding: utf-8 -*- 

2# Copyright (c) 2019 Ansible Project 

3# (c) 2016 Toshio Kuratomi <tkuratomi@ansible.com> 

4# Simplified BSD License (see licenses/simplified_bsd.txt or https://opensource.org/licenses/BSD-2-Clause) 

5 

6from __future__ import annotations 

7 

8import codecs 

9import datetime 

10import json 

11 

12from ansible.module_utils.six.moves.collections_abc import Set 

13from ansible.module_utils.six import ( 

14 PY3, 

15 binary_type, 

16 iteritems, 

17 text_type, 

18) 

19 

20try: 

21 codecs.lookup_error('surrogateescape') 

22 HAS_SURROGATEESCAPE = True 

23except LookupError: 

24 HAS_SURROGATEESCAPE = False 

25 

26 

27_COMPOSED_ERROR_HANDLERS = frozenset((None, 'surrogate_or_replace', 

28 'surrogate_or_strict', 

29 'surrogate_then_replace')) 

30 

31 

32def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): 

33 """Make sure that a string is a byte string 

34 

35 :arg obj: An object to make sure is a byte string. In most cases this 

36 will be either a text string or a byte string. However, with 

37 ``nonstring='simplerepr'``, this can be used as a traceback-free 

38 version of ``str(obj)``. 

39 :kwarg encoding: The encoding to use to transform from a text string to 

40 a byte string. Defaults to using 'utf-8'. 

41 :kwarg errors: The error handler to use if the text string is not 

42 encodable using the specified encoding. Any valid `codecs error 

43 handler <https://docs.python.org/3/library/codecs.html#codec-base-classes>`_ 

44 may be specified. There are three additional error strategies 

45 specifically aimed at helping people to port code. The first two are: 

46 

47 :surrogate_or_strict: Will use ``surrogateescape`` if it is a valid 

48 handler, otherwise it will use ``strict`` 

49 :surrogate_or_replace: Will use ``surrogateescape`` if it is a valid 

50 handler, otherwise it will use ``replace``. 

51 

52 Because ``surrogateescape`` was added in Python3 this usually means that 

53 Python3 will use ``surrogateescape`` and Python2 will use the fallback 

54 error handler. Note that the code checks for ``surrogateescape`` when the 

55 module is imported. If you have a backport of ``surrogateescape`` for 

56 Python2, be sure to register the error handler prior to importing this 

57 module. 

58 

59 The last error handler is: 

60 

61 :surrogate_then_replace: Will use ``surrogateescape`` if it is a valid 

62 handler. If encoding with ``surrogateescape`` would traceback, 

63 surrogates are first replaced with a replacement characters 

64 and then the string is encoded using ``replace`` (which replaces 

65 the rest of the nonencodable bytes). If ``surrogateescape`` is 

66 not present it will simply use ``replace``. (Added in Ansible 2.3) 

67 This strategy is designed to never traceback when it attempts 

68 to encode a string. 

69 

70 The default until Ansible-2.2 was ``surrogate_or_replace`` 

71 From Ansible-2.3 onwards, the default is ``surrogate_then_replace``. 

72 

73 :kwarg nonstring: The strategy to use if a nonstring is specified in 

74 ``obj``. Default is 'simplerepr'. Valid values are: 

75 

76 :simplerepr: The default. This takes the ``str`` of the object and 

77 then returns the bytes version of that string. 

78 :empty: Return an empty byte string 

79 :passthru: Return the object passed in 

80 :strict: Raise a :exc:`TypeError` 

81 

82 :returns: Typically this returns a byte string. If a nonstring object is 

83 passed in this may be a different type depending on the strategy 

84 specified by nonstring. This will never return a text string. 

85 

86 .. note:: If passed a byte string, this function does not check that the 

87 string is valid in the specified encoding. If it's important that the 

88 byte string is in the specified encoding do:: 

89 

90 encoded_string = to_bytes(to_text(input_string, 'latin-1'), 'utf-8') 

91 

92 .. version_changed:: 2.3 

93 

94 Added the ``surrogate_then_replace`` error handler and made it the default error handler. 

95 """ 

96 if isinstance(obj, binary_type): 

97 return obj 

98 

99 # We're given a text string 

100 # If it has surrogates, we know because it will decode 

101 original_errors = errors 

102 if errors in _COMPOSED_ERROR_HANDLERS: 

103 if HAS_SURROGATEESCAPE: 

104 errors = 'surrogateescape' 

105 elif errors == 'surrogate_or_strict': 

106 errors = 'strict' 

107 else: 

108 errors = 'replace' 

109 

110 if isinstance(obj, text_type): 

111 try: 

112 # Try this first as it's the fastest 

113 return obj.encode(encoding, errors) 

114 except UnicodeEncodeError: 

115 if original_errors in (None, 'surrogate_then_replace'): 

116 # We should only reach this if encoding was non-utf8 original_errors was 

117 # surrogate_then_escape and errors was surrogateescape 

118 

119 # Slow but works 

120 return_string = obj.encode('utf-8', 'surrogateescape') 

121 return_string = return_string.decode('utf-8', 'replace') 

122 return return_string.encode(encoding, 'replace') 

123 raise 

124 

125 # Note: We do these last even though we have to call to_bytes again on the 

126 # value because we're optimizing the common case 

127 if nonstring == 'simplerepr': 

128 try: 

129 value = str(obj) 

130 except UnicodeError: 

131 try: 

132 value = repr(obj) 

133 except UnicodeError: 

134 # Giving up 

135 return to_bytes('') 

136 elif nonstring == 'passthru': 

137 return obj 

138 elif nonstring == 'empty': 

139 # python2.4 doesn't have b'' 

140 return to_bytes('') 

141 elif nonstring == 'strict': 

142 raise TypeError('obj must be a string type') 

143 else: 

144 raise TypeError('Invalid value %s for to_bytes\' nonstring parameter' % nonstring) 

145 

146 return to_bytes(value, encoding, errors) 

147 

148 

149def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'): 

150 """Make sure that a string is a text string 

151 

152 :arg obj: An object to make sure is a text string. In most cases this 

153 will be either a text string or a byte string. However, with 

154 ``nonstring='simplerepr'``, this can be used as a traceback-free 

155 version of ``str(obj)``. 

156 :kwarg encoding: The encoding to use to transform from a byte string to 

157 a text string. Defaults to using 'utf-8'. 

158 :kwarg errors: The error handler to use if the byte string is not 

159 decodable using the specified encoding. Any valid `codecs error 

160 handler <https://docs.python.org/3/library/codecs.html#codec-base-classes>`_ 

161 may be specified. We support three additional error strategies 

162 specifically aimed at helping people to port code: 

163 

164 :surrogate_or_strict: Will use surrogateescape if it is a valid 

165 handler, otherwise it will use strict 

166 :surrogate_or_replace: Will use surrogateescape if it is a valid 

167 handler, otherwise it will use replace. 

168 :surrogate_then_replace: Does the same as surrogate_or_replace but 

169 `was added for symmetry with the error handlers in 

170 :func:`ansible.module_utils.common.text.converters.to_bytes` (Added in Ansible 2.3) 

171 

172 Because surrogateescape was added in Python3 this usually means that 

173 Python3 will use `surrogateescape` and Python2 will use the fallback 

174 error handler. Note that the code checks for surrogateescape when the 

175 module is imported. If you have a backport of `surrogateescape` for 

176 python2, be sure to register the error handler prior to importing this 

177 module. 

178 

179 The default until Ansible-2.2 was `surrogate_or_replace` 

180 In Ansible-2.3 this defaults to `surrogate_then_replace` for symmetry 

181 with :func:`ansible.module_utils.common.text.converters.to_bytes` . 

182 :kwarg nonstring: The strategy to use if a nonstring is specified in 

183 ``obj``. Default is 'simplerepr'. Valid values are: 

184 

185 :simplerepr: The default. This takes the ``str`` of the object and 

186 then returns the text version of that string. 

187 :empty: Return an empty text string 

188 :passthru: Return the object passed in 

189 :strict: Raise a :exc:`TypeError` 

190 

191 :returns: Typically this returns a text string. If a nonstring object is 

192 passed in this may be a different type depending on the strategy 

193 specified by nonstring. This will never return a byte string. 

194 From Ansible-2.3 onwards, the default is `surrogate_then_replace`. 

195 

196 .. version_changed:: 2.3 

197 

198 Added the surrogate_then_replace error handler and made it the default error handler. 

199 """ 

200 if isinstance(obj, text_type): 

201 return obj 

202 

203 if errors in _COMPOSED_ERROR_HANDLERS: 

204 if HAS_SURROGATEESCAPE: 

205 errors = 'surrogateescape' 

206 elif errors == 'surrogate_or_strict': 

207 errors = 'strict' 

208 else: 

209 errors = 'replace' 

210 

211 if isinstance(obj, binary_type): 

212 # Note: We don't need special handling for surrogate_then_replace 

213 # because all bytes will either be made into surrogates or are valid 

214 # to decode. 

215 return obj.decode(encoding, errors) 

216 

217 # Note: We do these last even though we have to call to_text again on the 

218 # value because we're optimizing the common case 

219 if nonstring == 'simplerepr': 

220 try: 

221 value = str(obj) 

222 except UnicodeError: 

223 try: 

224 value = repr(obj) 

225 except UnicodeError: 

226 # Giving up 

227 return u'' 

228 elif nonstring == 'passthru': 

229 return obj 

230 elif nonstring == 'empty': 

231 return u'' 

232 elif nonstring == 'strict': 

233 raise TypeError('obj must be a string type') 

234 else: 

235 raise TypeError('Invalid value %s for to_text\'s nonstring parameter' % nonstring) 

236 

237 return to_text(value, encoding, errors) 

238 

239 

240#: :py:func:`to_native` 

241#: Transform a variable into the native str type for the python version 

242#: 

243#: On Python2, this is an alias for 

244#: :func:`~ansible.module_utils.to_bytes`. On Python3 it is an alias for 

245#: :func:`~ansible.module_utils.to_text`. It makes it easier to 

246#: transform a variable into the native str type for the python version 

247#: the code is running on. Use this when constructing the message to 

248#: send to exceptions or when dealing with an API that needs to take 

249#: a native string. Example:: 

250#: 

251#: try: 

252#: 1//0 

253#: except ZeroDivisionError as e: 

254#: raise MyException('Encountered and error: %s' % to_native(e)) 

255if PY3: 

256 to_native = to_text 

257else: 

258 to_native = to_bytes 

259 

260 

261def _json_encode_fallback(obj): 

262 if isinstance(obj, Set): 

263 return list(obj) 

264 elif isinstance(obj, datetime.datetime): 

265 return obj.isoformat() 

266 raise TypeError("Cannot json serialize %s" % to_native(obj)) 

267 

268 

269def jsonify(data, **kwargs): 

270 # After 2.18, we should remove this loop, and hardcode to utf-8 in alignment with requiring utf-8 module responses 

271 for encoding in ("utf-8", "latin-1"): 

272 try: 

273 new_data = container_to_text(data, encoding=encoding) 

274 except UnicodeDecodeError: 

275 continue 

276 return json.dumps(new_data, default=_json_encode_fallback, **kwargs) 

277 raise UnicodeError('Invalid unicode encoding encountered') 

278 

279 

280def container_to_bytes(d, encoding='utf-8', errors='surrogate_or_strict'): 

281 ''' Recursively convert dict keys and values to byte str 

282 

283 Specialized for json return because this only handles, lists, tuples, 

284 and dict container types (the containers that the json module returns) 

285 ''' 

286 

287 if isinstance(d, text_type): 

288 return to_bytes(d, encoding=encoding, errors=errors) 

289 elif isinstance(d, dict): 

290 return dict(container_to_bytes(o, encoding, errors) for o in iteritems(d)) 

291 elif isinstance(d, list): 

292 return [container_to_bytes(o, encoding, errors) for o in d] 

293 elif isinstance(d, tuple): 

294 return tuple(container_to_bytes(o, encoding, errors) for o in d) 

295 else: 

296 return d 

297 

298 

299def container_to_text(d, encoding='utf-8', errors='surrogate_or_strict'): 

300 """Recursively convert dict keys and values to text str 

301 

302 Specialized for json return because this only handles, lists, tuples, 

303 and dict container types (the containers that the json module returns) 

304 """ 

305 

306 if isinstance(d, binary_type): 

307 # Warning, can traceback 

308 return to_text(d, encoding=encoding, errors=errors) 

309 elif isinstance(d, dict): 

310 return dict(container_to_text(o, encoding, errors) for o in iteritems(d)) 

311 elif isinstance(d, list): 

312 return [container_to_text(o, encoding, errors) for o in d] 

313 elif isinstance(d, tuple): 

314 return tuple(container_to_text(o, encoding, errors) for o in d) 

315 else: 

316 return d