1# Copyright (c) 2012-2013 Mitch Garnaat http://garnaat.org/
2# Copyright 2012-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License"). You
5# may not use this file except in compliance with the License. A copy of
6# the License is located at
7#
8# http://aws.amazon.com/apache2.0/
9#
10# or in the "license" file accompanying this file. This file is
11# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
12# ANY KIND, either express or implied. See the License for the specific
13# language governing permissions and limitations under the License.
14import functools
15import logging
16from collections.abc import Mapping
17
18import urllib3.util
19from urllib3.connection import HTTPConnection, VerifiedHTTPSConnection
20from urllib3.connectionpool import HTTPConnectionPool, HTTPSConnectionPool
21
22import botocore.utils
23from botocore.compat import (
24 HTTPHeaders,
25 HTTPResponse,
26 MutableMapping,
27 urlencode,
28 urlparse,
29 urlsplit,
30 urlunsplit,
31)
32from botocore.exceptions import UnseekableStreamError
33
34logger = logging.getLogger(__name__)
35
36
37class AWSHTTPResponse(HTTPResponse):
38 # The *args, **kwargs is used because the args are slightly
39 # different in py2.6 than in py2.7/py3.
40 def __init__(self, *args, **kwargs):
41 self._status_tuple = kwargs.pop('status_tuple')
42 HTTPResponse.__init__(self, *args, **kwargs)
43
44 def _read_status(self):
45 if self._status_tuple is not None:
46 status_tuple = self._status_tuple
47 self._status_tuple = None
48 return status_tuple
49 else:
50 return HTTPResponse._read_status(self)
51
52
53class AWSConnection:
54 """Mixin for HTTPConnection that supports Expect 100-continue.
55
56 This when mixed with a subclass of httplib.HTTPConnection (though
57 technically we subclass from urllib3, which subclasses
58 httplib.HTTPConnection) and we only override this class to support Expect
59 100-continue, which we need for S3. As far as I can tell, this is
60 general purpose enough to not be specific to S3, but I'm being
61 tentative and keeping it in botocore because I've only tested
62 this against AWS services.
63
64 """
65
66 def __init__(self, *args, **kwargs):
67 super().__init__(*args, **kwargs)
68 self._original_response_cls = self.response_class
69 # This variable is set when we receive an early response from the
70 # server. If this value is set to True, any calls to send() are noops.
71 # This value is reset to false every time _send_request is called.
72 # This is to workaround changes in urllib3 2.0 which uses separate
73 # send() calls in request() instead of delegating to endheaders(),
74 # which is where the body is sent in CPython's HTTPConnection.
75 self._response_received = False
76 self._expect_header_set = False
77 self._send_called = False
78
79 def close(self):
80 super().close()
81 # Reset all of our instance state we were tracking.
82 self._response_received = False
83 self._expect_header_set = False
84 self._send_called = False
85 self.response_class = self._original_response_cls
86
87 def request(self, method, url, body=None, headers=None, *args, **kwargs):
88 if headers is None:
89 headers = {}
90 self._response_received = False
91 if headers.get('Expect', b'') == b'100-continue':
92 self._expect_header_set = True
93 else:
94 self._expect_header_set = False
95 self.response_class = self._original_response_cls
96 rval = super().request(method, url, body, headers, *args, **kwargs)
97 self._expect_header_set = False
98 return rval
99
100 def _convert_to_bytes(self, mixed_buffer):
101 # Take a list of mixed str/bytes and convert it
102 # all into a single bytestring.
103 # Any str will be encoded as utf-8.
104 bytes_buffer = []
105 for chunk in mixed_buffer:
106 if isinstance(chunk, str):
107 bytes_buffer.append(chunk.encode('utf-8'))
108 else:
109 bytes_buffer.append(chunk)
110 msg = b"\r\n".join(bytes_buffer)
111 return msg
112
113 def _send_output(self, message_body=None, *args, **kwargs):
114 self._buffer.extend((b"", b""))
115 msg = self._convert_to_bytes(self._buffer)
116 del self._buffer[:]
117 # If msg and message_body are sent in a single send() call,
118 # it will avoid performance problems caused by the interaction
119 # between delayed ack and the Nagle algorithm.
120 if isinstance(message_body, bytes):
121 msg += message_body
122 message_body = None
123 self.send(msg)
124 if self._expect_header_set:
125 # This is our custom behavior. If the Expect header was
126 # set, it will trigger this custom behavior.
127 logger.debug("Waiting for 100 Continue response.")
128 # Wait for 1 second for the server to send a response.
129 if urllib3.util.wait_for_read(self.sock, 1):
130 self._handle_expect_response(message_body)
131 return
132 else:
133 # From the RFC:
134 # Because of the presence of older implementations, the
135 # protocol allows ambiguous situations in which a client may
136 # send "Expect: 100-continue" without receiving either a 417
137 # (Expectation Failed) status or a 100 (Continue) status.
138 # Therefore, when a client sends this header field to an origin
139 # server (possibly via a proxy) from which it has never seen a
140 # 100 (Continue) status, the client SHOULD NOT wait for an
141 # indefinite period before sending the request body.
142 logger.debug(
143 "No response seen from server, continuing to "
144 "send the response body."
145 )
146 if message_body is not None:
147 # message_body was not a string (i.e. it is a file), and
148 # we must run the risk of Nagle.
149 self.send(message_body)
150
151 def _consume_headers(self, fp):
152 # Most servers (including S3) will just return
153 # the CLRF after the 100 continue response. However,
154 # some servers (I've specifically seen this for squid when
155 # used as a straight HTTP proxy) will also inject a
156 # Connection: keep-alive header. To account for this
157 # we'll read until we read '\r\n', and ignore any headers
158 # that come immediately after the 100 continue response.
159 current = None
160 while current != b'\r\n':
161 current = fp.readline()
162
163 def _handle_expect_response(self, message_body):
164 # This is called when we sent the request headers containing
165 # an Expect: 100-continue header and received a response.
166 # We now need to figure out what to do.
167 fp = self.sock.makefile('rb', 0)
168 try:
169 maybe_status_line = fp.readline()
170 parts = maybe_status_line.split(None, 2)
171 if self._is_100_continue_status(maybe_status_line):
172 self._consume_headers(fp)
173 logger.debug(
174 "100 Continue response seen, now sending request body."
175 )
176 self._send_message_body(message_body)
177 elif len(parts) == 3 and parts[0].startswith(b'HTTP/'):
178 # From the RFC:
179 # Requirements for HTTP/1.1 origin servers:
180 #
181 # - Upon receiving a request which includes an Expect
182 # request-header field with the "100-continue"
183 # expectation, an origin server MUST either respond with
184 # 100 (Continue) status and continue to read from the
185 # input stream, or respond with a final status code.
186 #
187 # So if we don't get a 100 Continue response, then
188 # whatever the server has sent back is the final response
189 # and don't send the message_body.
190 logger.debug(
191 "Received a non 100 Continue response "
192 "from the server, NOT sending request body."
193 )
194 status_tuple = (
195 parts[0].decode('ascii'),
196 int(parts[1]),
197 parts[2].decode('ascii'),
198 )
199 response_class = functools.partial(
200 AWSHTTPResponse, status_tuple=status_tuple
201 )
202 self.response_class = response_class
203 self._response_received = True
204 finally:
205 fp.close()
206
207 def _send_message_body(self, message_body):
208 if message_body is not None:
209 self.send(message_body)
210
211 def send(self, str):
212 if self._response_received:
213 if not self._send_called:
214 # urllib3 2.0 chunks and calls send potentially
215 # thousands of times inside `request` unlike the
216 # standard library. Only log this once for sanity.
217 logger.debug(
218 "send() called, but response already received. "
219 "Not sending data."
220 )
221 self._send_called = True
222 return
223 return super().send(str)
224
225 def _is_100_continue_status(self, maybe_status_line):
226 parts = maybe_status_line.split(None, 2)
227 # Check for HTTP/<version> 100 Continue\r\n
228 return (
229 len(parts) >= 3
230 and parts[0].startswith(b'HTTP/')
231 and parts[1] == b'100'
232 )
233
234
235class AWSHTTPConnection(AWSConnection, HTTPConnection):
236 """An HTTPConnection that supports 100 Continue behavior."""
237
238
239class AWSHTTPSConnection(AWSConnection, VerifiedHTTPSConnection):
240 """An HTTPSConnection that supports 100 Continue behavior."""
241
242
243class AWSHTTPConnectionPool(HTTPConnectionPool):
244 ConnectionCls = AWSHTTPConnection
245
246
247class AWSHTTPSConnectionPool(HTTPSConnectionPool):
248 ConnectionCls = AWSHTTPSConnection
249
250
251def prepare_request_dict(
252 request_dict, endpoint_url, context=None, user_agent=None
253):
254 """
255 This method prepares a request dict to be created into an
256 AWSRequestObject. This prepares the request dict by adding the
257 url and the user agent to the request dict.
258
259 :type request_dict: dict
260 :param request_dict: The request dict (created from the
261 ``serialize`` module).
262
263 :type user_agent: string
264 :param user_agent: The user agent to use for this request.
265
266 :type endpoint_url: string
267 :param endpoint_url: The full endpoint url, which contains at least
268 the scheme, the hostname, and optionally any path components.
269 """
270 r = request_dict
271 if user_agent is not None:
272 headers = r['headers']
273 headers['User-Agent'] = user_agent
274 host_prefix = r.get('host_prefix')
275 url = _urljoin(endpoint_url, r['url_path'], host_prefix)
276 if r['query_string']:
277 # NOTE: This is to avoid circular import with utils. This is being
278 # done to avoid moving classes to different modules as to not cause
279 # breaking chainges.
280 percent_encode_sequence = botocore.utils.percent_encode_sequence
281 encoded_query_string = percent_encode_sequence(r['query_string'])
282 if '?' not in url:
283 url += f'?{encoded_query_string}'
284 else:
285 url += f'&{encoded_query_string}'
286 r['url'] = url
287 r['context'] = context
288 if context is None:
289 r['context'] = {}
290
291
292def create_request_object(request_dict):
293 """
294 This method takes a request dict and creates an AWSRequest object
295 from it.
296
297 :type request_dict: dict
298 :param request_dict: The request dict (created from the
299 ``prepare_request_dict`` method).
300
301 :rtype: ``botocore.awsrequest.AWSRequest``
302 :return: An AWSRequest object based on the request_dict.
303
304 """
305 r = request_dict
306 request_object = AWSRequest(
307 method=r['method'],
308 url=r['url'],
309 data=r['body'],
310 headers=r['headers'],
311 auth_path=r.get('auth_path'),
312 )
313 request_object.context = r['context']
314 return request_object
315
316
317def _urljoin(endpoint_url, url_path, host_prefix):
318 p = urlsplit(endpoint_url)
319 # <part> - <index>
320 # scheme - p[0]
321 # netloc - p[1]
322 # path - p[2]
323 # query - p[3]
324 # fragment - p[4]
325 if not url_path or url_path == '/':
326 # If there's no path component, ensure the URL ends with
327 # a '/' for backwards compatibility.
328 if not p[2]:
329 new_path = '/'
330 else:
331 new_path = p[2]
332 elif p[2].endswith('/') and url_path.startswith('/'):
333 new_path = p[2][:-1] + url_path
334 else:
335 new_path = p[2] + url_path
336
337 new_netloc = p[1]
338 if host_prefix is not None:
339 new_netloc = host_prefix + new_netloc
340
341 reconstructed = urlunsplit((p[0], new_netloc, new_path, p[3], p[4]))
342 return reconstructed
343
344
345class AWSRequestPreparer:
346 """
347 This class performs preparation on AWSRequest objects similar to that of
348 the PreparedRequest class does in the requests library. However, the logic
349 has been boiled down to meet the specific use cases in botocore. Of note
350 there are the following differences:
351 This class does not heavily prepare the URL. Requests performed many
352 validations and corrections to ensure the URL is properly formatted.
353 Botocore either performs these validations elsewhere or otherwise
354 consistently provides well formatted URLs.
355
356 This class does not heavily prepare the body. Body preperation is
357 simple and supports only the cases that we document: bytes and
358 file-like objects to determine the content-length. This will also
359 additionally prepare a body that is a dict to be url encoded params
360 string as some signers rely on this. Finally, this class does not
361 support multipart file uploads.
362
363 This class does not prepare the method, auth or cookies.
364 """
365
366 def prepare(self, original):
367 method = original.method
368 url = self._prepare_url(original)
369 body = self._prepare_body(original)
370 headers = self._prepare_headers(original, body)
371 stream_output = original.stream_output
372
373 return AWSPreparedRequest(method, url, headers, body, stream_output)
374
375 def _prepare_url(self, original):
376 url = original.url
377 if original.params:
378 url_parts = urlparse(url)
379 delim = '&' if url_parts.query else '?'
380 if isinstance(original.params, Mapping):
381 params_to_encode = list(original.params.items())
382 else:
383 params_to_encode = original.params
384 params = urlencode(params_to_encode, doseq=True)
385 url = delim.join((url, params))
386 return url
387
388 def _prepare_headers(self, original, prepared_body=None):
389 headers = HeadersDict(original.headers.items())
390
391 # If the transfer encoding or content length is already set, use that
392 if 'Transfer-Encoding' in headers or 'Content-Length' in headers:
393 return headers
394
395 # Ensure we set the content length when it is expected
396 if original.method not in ('GET', 'HEAD', 'OPTIONS'):
397 length = self._determine_content_length(prepared_body)
398 if length is not None:
399 headers['Content-Length'] = str(length)
400 else:
401 # Failed to determine content length, using chunked
402 # NOTE: This shouldn't ever happen in practice
403 body_type = type(prepared_body)
404 logger.debug('Failed to determine length of %s', body_type)
405 headers['Transfer-Encoding'] = 'chunked'
406
407 return headers
408
409 def _to_utf8(self, item):
410 key, value = item
411 if isinstance(key, str):
412 key = key.encode('utf-8')
413 if isinstance(value, str):
414 value = value.encode('utf-8')
415 return key, value
416
417 def _prepare_body(self, original):
418 """Prepares the given HTTP body data."""
419 body = original.data
420 if body == b'':
421 body = None
422
423 if isinstance(body, dict):
424 params = [self._to_utf8(item) for item in body.items()]
425 body = urlencode(params, doseq=True)
426
427 return body
428
429 def _determine_content_length(self, body):
430 return botocore.utils.determine_content_length(body)
431
432
433class AWSRequest:
434 """Represents the elements of an HTTP request.
435
436 This class was originally inspired by requests.models.Request, but has been
437 boiled down to meet the specific use cases in botocore. That being said this
438 class (even in requests) is effectively a named-tuple.
439 """
440
441 _REQUEST_PREPARER_CLS = AWSRequestPreparer
442
443 def __init__(
444 self,
445 method=None,
446 url=None,
447 headers=None,
448 data=None,
449 params=None,
450 auth_path=None,
451 stream_output=False,
452 ):
453 self._request_preparer = self._REQUEST_PREPARER_CLS()
454
455 # Default empty dicts for dict params.
456 params = {} if params is None else params
457
458 self.method = method
459 self.url = url
460 self.headers = HTTPHeaders()
461 self.data = data
462 self.params = params
463 self.auth_path = auth_path
464 self.stream_output = stream_output
465
466 if headers is not None:
467 for key, value in headers.items():
468 self.headers[key] = value
469
470 # This is a dictionary to hold information that is used when
471 # processing the request. What is inside of ``context`` is open-ended.
472 # For example, it may have a timestamp key that is used for holding
473 # what the timestamp is when signing the request. Note that none
474 # of the information that is inside of ``context`` is directly
475 # sent over the wire; the information is only used to assist in
476 # creating what is sent over the wire.
477 self.context = {}
478
479 def prepare(self):
480 """Constructs a :class:`AWSPreparedRequest <AWSPreparedRequest>`."""
481 return self._request_preparer.prepare(self)
482
483 @property
484 def body(self):
485 body = self.prepare().body
486 if isinstance(body, str):
487 body = body.encode('utf-8')
488 return body
489
490
491class AWSPreparedRequest:
492 """A data class representing a finalized request to be sent over the wire.
493
494 Requests at this stage should be treated as final, and the properties of
495 the request should not be modified.
496
497 :ivar method: The HTTP Method
498 :ivar url: The full url
499 :ivar headers: The HTTP headers to send.
500 :ivar body: The HTTP body.
501 :ivar stream_output: If the response for this request should be streamed.
502 """
503
504 def __init__(self, method, url, headers, body, stream_output):
505 self.method = method
506 self.url = url
507 self.headers = headers
508 self.body = body
509 self.stream_output = stream_output
510
511 def __repr__(self):
512 fmt = (
513 '<AWSPreparedRequest stream_output=%s, method=%s, url=%s, '
514 'headers=%s>'
515 )
516 return fmt % (self.stream_output, self.method, self.url, self.headers)
517
518 def reset_stream(self):
519 """Resets the streaming body to it's initial position.
520
521 If the request contains a streaming body (a streamable file-like object)
522 seek to the object's initial position to ensure the entire contents of
523 the object is sent. This is a no-op for static bytes-like body types.
524 """
525 # Trying to reset a stream when there is a no stream will
526 # just immediately return. It's not an error, it will produce
527 # the same result as if we had actually reset the stream (we'll send
528 # the entire body contents again if we need to).
529 # Same case if the body is a string/bytes/bytearray type.
530
531 non_seekable_types = (bytes, str, bytearray)
532 if self.body is None or isinstance(self.body, non_seekable_types):
533 return
534 try:
535 logger.debug("Rewinding stream: %s", self.body)
536 self.body.seek(0)
537 except Exception as e:
538 logger.debug("Unable to rewind stream: %s", e)
539 raise UnseekableStreamError(stream_object=self.body)
540
541
542class AWSResponse:
543 """A data class representing an HTTP response.
544
545 This class was originally inspired by requests.models.Response, but has
546 been boiled down to meet the specific use cases in botocore. This has
547 effectively been reduced to a named tuple.
548
549 :ivar url: The full url.
550 :ivar status_code: The status code of the HTTP response.
551 :ivar headers: The HTTP headers received.
552 :ivar body: The HTTP response body.
553 """
554
555 def __init__(self, url, status_code, headers, raw):
556 self.url = url
557 self.status_code = status_code
558 self.headers = HeadersDict(headers)
559 self.raw = raw
560
561 self._content = None
562
563 @property
564 def content(self):
565 """Content of the response as bytes."""
566
567 if self._content is None:
568 # Read the contents.
569 # NOTE: requests would attempt to call stream and fall back
570 # to a custom generator that would call read in a loop, but
571 # we don't rely on this behavior
572 self._content = b''.join(self.raw.stream()) or b''
573
574 return self._content
575
576 @property
577 def text(self):
578 """Content of the response as a proper text type.
579
580 Uses the encoding type provided in the reponse headers to decode the
581 response content into a proper text type. If the encoding is not
582 present in the headers, UTF-8 is used as a default.
583 """
584 encoding = botocore.utils.get_encoding_from_headers(self.headers)
585 if encoding:
586 return self.content.decode(encoding)
587 else:
588 return self.content.decode('utf-8')
589
590
591class _HeaderKey:
592 def __init__(self, key):
593 self._key = key
594 self._lower = key.lower()
595
596 def __hash__(self):
597 return hash(self._lower)
598
599 def __eq__(self, other):
600 return isinstance(other, _HeaderKey) and self._lower == other._lower
601
602 def __str__(self):
603 return self._key
604
605 def __repr__(self):
606 return repr(self._key)
607
608
609class HeadersDict(MutableMapping):
610 """A case-insenseitive dictionary to represent HTTP headers."""
611
612 def __init__(self, *args, **kwargs):
613 self._dict = {}
614 self.update(*args, **kwargs)
615
616 def __setitem__(self, key, value):
617 self._dict[_HeaderKey(key)] = value
618
619 def __getitem__(self, key):
620 return self._dict[_HeaderKey(key)]
621
622 def __delitem__(self, key):
623 del self._dict[_HeaderKey(key)]
624
625 def __iter__(self):
626 return (str(key) for key in self._dict)
627
628 def __len__(self):
629 return len(self._dict)
630
631 def __repr__(self):
632 return repr(self._dict)
633
634 def copy(self):
635 return HeadersDict(self.items())