1# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"). You
4# may not use this file except in compliance with the License. A copy of
5# the License is located at
6#
7# http://aws.amazon.com/apache2.0/
8#
9# or in the "license" file accompanying this file. This file is
10# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11# ANY KIND, either express or implied. See the License for the specific
12# language governing permissions and limitations under the License.
13"""Response parsers for the various protocol types.
14
15The module contains classes that can take an HTTP response, and given
16an output shape, parse the response into a dict according to the
17rules in the output shape.
18
19There are many similarities amongst the different protocols with regard
20to response parsing, and the code is structured in a way to avoid
21code duplication when possible. The diagram below is a diagram
22showing the inheritance hierarchy of the response classes.
23
24::
25
26
27 +-------------------+
28 | ResponseParser |
29 +-------------------+
30 ^ ^ ^ ^ ^
31 | | | | |
32 | | | | +--------------------------------------------+
33 | | | +-----------------------------+ |
34 | | | | |
35 +--------------------+ | +----------------+ | |
36 | | | | |
37+----------+----------+ +------+-------+ +-------+------+ +------+-------+ +------+--------+
38|BaseXMLResponseParser| |BaseRestParser| |BaseJSONParser| |BaseCBORParser| |BaseRpcV2Parser|
39+---------------------+ +--------------+ +--------------+ +----------+---+ +-+-------------+
40 ^ ^ ^ ^ ^ ^ ^ ^
41 | | | | | | | |
42 | | | | | | | |
43 | ++----------+-+ +-+--------+---+ | +---+---------+-+
44 | |RestXMLParser| |RestJSONParser| | |RpcV2CBORParser|
45 +-----+-----+ +-------------+ +--------------+ | +---+---------+-+
46 |QueryParser| |
47 +-----------+ +----+-----+
48 |JSONParser|
49 +----------+
50
51The diagram above shows that there is a base class, ``ResponseParser`` that
52contains logic that is similar amongst all the different protocols (``query``,
53``json``, ``rest-json``, ``rest-xml``, ``smithy-rpc-v2-cbor``). Amongst the various services
54there is shared logic that can be grouped several ways:
55
56* The ``query`` and ``rest-xml`` both have XML bodies that are parsed in the
57 same way.
58* The ``json`` and ``rest-json`` protocols both have JSON bodies that are
59 parsed in the same way.
60* The ``rest-json`` and ``rest-xml`` protocols have additional attributes
61 besides body parameters that are parsed the same (headers, query string,
62 status code).
63
64This is reflected in the class diagram above. The ``BaseXMLResponseParser``
65and the BaseJSONParser contain logic for parsing the XML/JSON body,
66and the BaseRestParser contains logic for parsing out attributes that
67come from other parts of the HTTP response. Classes like the
68``RestXMLParser`` inherit from the ``BaseXMLResponseParser`` to get the
69XML body parsing logic and the ``BaseRestParser`` to get the HTTP
70header/status code/query string parsing.
71
72Additionally, there are event stream parsers that are used by the other parsers
73to wrap streaming bodies that represent a stream of events. The
74BaseEventStreamParser extends from ResponseParser and defines the logic for
75parsing values from the headers and payload of a message from the underlying
76binary encoding protocol. Currently, event streams support parsing bodies
77encoded as JSON and XML through the following hierarchy.
78
79
80 +--------------+
81 |ResponseParser|
82 +--------------+
83 ^ ^ ^
84 +--------------------+ | +------------------+
85 | | |
86 +----------+----------+ +----------+----------+ +-------+------+
87 |BaseXMLResponseParser| |BaseEventStreamParser| |BaseJSONParser|
88 +---------------------+ +---------------------+ +--------------+
89 ^ ^ ^ ^
90 | | | |
91 | | | |
92 +-+----------------+-+ +-+-----------------+-+
93 |EventStreamXMLParser| |EventStreamJSONParser|
94 +--------------------+ +---------------------+
95
96Return Values
97=============
98
99Each call to ``parse()`` returns a dict has this form::
100
101 Standard Response
102
103 {
104 "ResponseMetadata": {"RequestId": <requestid>}
105 <response keys>
106 }
107
108 Error response
109
110 {
111 "ResponseMetadata": {"RequestId": <requestid>}
112 "Error": {
113 "Code": <string>,
114 "Message": <string>,
115 "Type": <string>,
116 <additional keys>
117 }
118 }
119
120"""
121
122import base64
123import http.client
124import io
125import json
126import logging
127import os
128import re
129import struct
130
131from botocore.compat import ETree, XMLParseError
132from botocore.eventstream import EventStream, NoInitialResponseError
133from botocore.utils import (
134 CachedProperty,
135 ensure_boolean,
136 is_json_value_header,
137 lowercase_dict,
138 merge_dicts,
139 parse_timestamp,
140)
141
142LOG = logging.getLogger(__name__)
143
144DEFAULT_TIMESTAMP_PARSER = parse_timestamp
145
146
147class ResponseParserFactory:
148 def __init__(self):
149 self._defaults = {}
150
151 def set_parser_defaults(self, **kwargs):
152 """Set default arguments when a parser instance is created.
153
154 You can specify any kwargs that are allowed by a ResponseParser
155 class. There are currently two arguments:
156
157 * timestamp_parser - A callable that can parse a timestamp string
158 * blob_parser - A callable that can parse a blob type
159
160 """
161 self._defaults.update(kwargs)
162
163 def create_parser(self, protocol_name):
164 parser_cls = PROTOCOL_PARSERS[protocol_name]
165 return parser_cls(**self._defaults)
166
167
168def create_parser(protocol):
169 return ResponseParserFactory().create_parser(protocol)
170
171
172def _text_content(func):
173 # This decorator hides the difference between
174 # an XML node with text or a plain string. It's used
175 # to ensure that scalar processing operates only on text
176 # strings, which allows the same scalar handlers to be used
177 # for XML nodes from the body and HTTP headers.
178 def _get_text_content(self, shape, node_or_string):
179 if hasattr(node_or_string, 'text'):
180 text = node_or_string.text
181 if text is None:
182 # If an XML node is empty <foo></foo>,
183 # we want to parse that as an empty string,
184 # not as a null/None value.
185 text = ''
186 else:
187 text = node_or_string
188 return func(self, shape, text)
189
190 return _get_text_content
191
192
193class ResponseParserError(Exception):
194 pass
195
196
197class ResponseParser:
198 """Base class for response parsing.
199
200 This class represents the interface that all ResponseParsers for the
201 various protocols must implement.
202
203 This class will take an HTTP response and a model shape and parse the
204 HTTP response into a dictionary.
205
206 There is a single public method exposed: ``parse``. See the ``parse``
207 docstring for more info.
208
209 """
210
211 DEFAULT_ENCODING = 'utf-8'
212 EVENT_STREAM_PARSER_CLS = None
213 # This is a list of known values for the 'location' key in the
214 # serialization dict. The location key tells us where in the response
215 # to parse the value. Members with locations that aren't in this list
216 # will be parsed from the body.
217 KNOWN_LOCATIONS = ('header', 'headers', 'statusCode')
218
219 def __init__(self, timestamp_parser=None, blob_parser=None):
220 if timestamp_parser is None:
221 timestamp_parser = DEFAULT_TIMESTAMP_PARSER
222 self._timestamp_parser = timestamp_parser
223 if blob_parser is None:
224 blob_parser = self._default_blob_parser
225 self._blob_parser = blob_parser
226 self._event_stream_parser = None
227 if self.EVENT_STREAM_PARSER_CLS is not None:
228 self._event_stream_parser = self.EVENT_STREAM_PARSER_CLS(
229 timestamp_parser, blob_parser
230 )
231
232 def _default_blob_parser(self, value):
233 # Blobs are always returned as bytes type (this matters on python3).
234 # We don't decode this to a str because it's entirely possible that the
235 # blob contains binary data that actually can't be decoded.
236 return base64.b64decode(value)
237
238 def parse(self, response, shape):
239 """Parse the HTTP response given a shape.
240
241 :param response: The HTTP response dictionary. This is a dictionary
242 that represents the HTTP request. The dictionary must have the
243 following keys, ``body``, ``headers``, and ``status_code``.
244
245 :param shape: The model shape describing the expected output.
246 :return: Returns a dictionary representing the parsed response
247 described by the model. In addition to the shape described from
248 the model, each response will also have a ``ResponseMetadata``
249 which contains metadata about the response, which contains at least
250 two keys containing ``RequestId`` and ``HTTPStatusCode``. Some
251 responses may populate additional keys, but ``RequestId`` will
252 always be present.
253
254 """
255 LOG.debug('Response headers: %r', response['headers'])
256 LOG.debug('Response body:\n%r', response['body'])
257 if response['status_code'] >= 301:
258 if self._is_generic_error_response(response):
259 parsed = self._do_generic_error_parse(response)
260 elif self._is_modeled_error_shape(shape):
261 parsed = self._do_modeled_error_parse(response, shape)
262 # We don't want to decorate the modeled fields with metadata
263 return parsed
264 else:
265 parsed = self._do_error_parse(response, shape)
266 else:
267 parsed = self._do_parse(response, shape)
268
269 # We don't want to decorate event stream responses with metadata
270 if shape and shape.serialization.get('eventstream'):
271 return parsed
272
273 # Add ResponseMetadata if it doesn't exist and inject the HTTP
274 # status code and headers from the response.
275 if isinstance(parsed, dict):
276 response_metadata = parsed.get('ResponseMetadata', {})
277 response_metadata['HTTPStatusCode'] = response['status_code']
278 # Ensure that the http header keys are all lower cased. Older
279 # versions of urllib3 (< 1.11) would unintentionally do this for us
280 # (see urllib3#633). We need to do this conversion manually now.
281 headers = response['headers']
282 response_metadata['HTTPHeaders'] = lowercase_dict(headers)
283 parsed['ResponseMetadata'] = response_metadata
284 self._add_checksum_response_metadata(response, response_metadata)
285 return parsed
286
287 def _add_checksum_response_metadata(self, response, response_metadata):
288 checksum_context = response.get('context', {}).get('checksum', {})
289 algorithm = checksum_context.get('response_algorithm')
290 if algorithm:
291 response_metadata['ChecksumAlgorithm'] = algorithm
292
293 def _is_modeled_error_shape(self, shape):
294 return shape is not None and shape.metadata.get('exception', False)
295
296 def _is_generic_error_response(self, response):
297 # There are times when a service will respond with a generic
298 # error response such as:
299 # '<html><body><b>Http/1.1 Service Unavailable</b></body></html>'
300 #
301 # This can also happen if you're going through a proxy.
302 # In this case the protocol specific _do_error_parse will either
303 # fail to parse the response (in the best case) or silently succeed
304 # and treat the HTML above as an XML response and return
305 # non sensical parsed data.
306 # To prevent this case from happening we first need to check
307 # whether or not this response looks like the generic response.
308 if response['status_code'] >= 500:
309 if 'body' not in response or response['body'] is None:
310 return True
311
312 body = response['body'].strip()
313 return body.startswith(b'<html>') or not body
314
315 def _do_generic_error_parse(self, response):
316 # There's not really much we can do when we get a generic
317 # html response.
318 LOG.debug(
319 "Received a non protocol specific error response from the "
320 "service, unable to populate error code and message."
321 )
322 return {
323 'Error': {
324 'Code': str(response['status_code']),
325 'Message': http.client.responses.get(
326 response['status_code'], ''
327 ),
328 },
329 'ResponseMetadata': {},
330 }
331
332 def _do_parse(self, response, shape):
333 raise NotImplementedError(f"{self.__class__.__name__}._do_parse")
334
335 def _do_error_parse(self, response, shape):
336 raise NotImplementedError(f"{self.__class__.__name__}._do_error_parse")
337
338 def _do_modeled_error_parse(self, response, shape, parsed):
339 raise NotImplementedError(
340 f"{self.__class__.__name__}._do_modeled_error_parse"
341 )
342
343 def _parse_shape(self, shape, node):
344 handler = getattr(
345 self, f'_handle_{shape.type_name}', self._default_handle
346 )
347 return handler(shape, node)
348
349 def _handle_list(self, shape, node):
350 # Enough implementations share list serialization that it's moved
351 # up here in the base class.
352 parsed = []
353 member_shape = shape.member
354 for item in node:
355 parsed.append(self._parse_shape(member_shape, item))
356 return parsed
357
358 def _default_handle(self, shape, value):
359 return value
360
361 def _create_event_stream(self, response, shape):
362 parser = self._event_stream_parser
363 name = response['context'].get('operation_name')
364 return EventStream(response['body'], shape, parser, name)
365
366 def _get_first_key(self, value):
367 return list(value)[0]
368
369 def _has_unknown_tagged_union_member(self, shape, value):
370 if shape.is_tagged_union:
371 cleaned_value = value.copy()
372 cleaned_value.pop("__type", None)
373 cleaned_value = {
374 k: v for k, v in cleaned_value.items() if v is not None
375 }
376 if len(cleaned_value) != 1:
377 error_msg = (
378 "Invalid service response: %s must have one and only "
379 "one member set."
380 )
381 raise ResponseParserError(error_msg % shape.name)
382 tag = self._get_first_key(cleaned_value)
383 serialized_member_names = [
384 shape.members[member].serialization.get('name', member)
385 for member in shape.members
386 ]
387 if tag not in serialized_member_names:
388 LOG.info(
389 "Received a tagged union response with member unknown to client: %s. "
390 "Please upgrade SDK for full response support.",
391 tag,
392 )
393 return True
394 return False
395
396 def _handle_unknown_tagged_union_member(self, tag):
397 return {'SDK_UNKNOWN_MEMBER': {'name': tag}}
398
399 def _do_query_compatible_error_parse(self, code, headers, error):
400 """
401 Error response may contain an x-amzn-query-error header to translate
402 errors codes from former `query` services into other protocols. We use this
403 to do our lookup in the errorfactory for modeled errors.
404 """
405 query_error = headers['x-amzn-query-error']
406 query_error_components = query_error.split(';')
407
408 if len(query_error_components) == 2 and query_error_components[0]:
409 error['Error']['QueryErrorCode'] = code
410 error['Error']['Type'] = query_error_components[1]
411 return query_error_components[0]
412 return code
413
414
415class BaseXMLResponseParser(ResponseParser):
416 def __init__(self, timestamp_parser=None, blob_parser=None):
417 super().__init__(timestamp_parser, blob_parser)
418 self._namespace_re = re.compile('{.*}')
419
420 def _handle_map(self, shape, node):
421 parsed = {}
422 key_shape = shape.key
423 value_shape = shape.value
424 key_location_name = key_shape.serialization.get('name') or 'key'
425 value_location_name = value_shape.serialization.get('name') or 'value'
426 if shape.serialization.get('flattened') and not isinstance(node, list):
427 node = [node]
428 for keyval_node in node:
429 for single_pair in keyval_node:
430 # Within each <entry> there's a <key> and a <value>
431 tag_name = self._node_tag(single_pair)
432 if tag_name == key_location_name:
433 key_name = self._parse_shape(key_shape, single_pair)
434 elif tag_name == value_location_name:
435 val_name = self._parse_shape(value_shape, single_pair)
436 else:
437 raise ResponseParserError(f"Unknown tag: {tag_name}")
438 parsed[key_name] = val_name
439 return parsed
440
441 def _node_tag(self, node):
442 return self._namespace_re.sub('', node.tag)
443
444 def _handle_list(self, shape, node):
445 # When we use _build_name_to_xml_node, repeated elements are aggregated
446 # into a list. However, we can't tell the difference between a scalar
447 # value and a single element flattened list. So before calling the
448 # real _handle_list, we know that "node" should actually be a list if
449 # it's flattened, and if it's not, then we make it a one element list.
450 if shape.serialization.get('flattened') and not isinstance(node, list):
451 node = [node]
452 return super()._handle_list(shape, node)
453
454 def _handle_structure(self, shape, node):
455 parsed = {}
456 members = shape.members
457 if shape.metadata.get('exception', False):
458 node = self._get_error_root(node)
459 xml_dict = self._build_name_to_xml_node(node)
460 if self._has_unknown_tagged_union_member(shape, xml_dict):
461 tag = self._get_first_key(xml_dict)
462 return self._handle_unknown_tagged_union_member(tag)
463 for member_name in members:
464 member_shape = members[member_name]
465 location = member_shape.serialization.get('location')
466 if (
467 location in self.KNOWN_LOCATIONS
468 or member_shape.serialization.get('eventheader')
469 ):
470 # All members with known locations have already been handled,
471 # so we don't need to parse these members.
472 continue
473 xml_name = self._member_key_name(member_shape, member_name)
474 member_node = xml_dict.get(xml_name)
475 if member_node is not None:
476 parsed[member_name] = self._parse_shape(
477 member_shape, member_node
478 )
479 elif member_shape.serialization.get('xmlAttribute'):
480 attribs = {}
481 location_name = member_shape.serialization['name']
482 for key, value in node.attrib.items():
483 new_key = self._namespace_re.sub(
484 location_name.split(':')[0] + ':', key
485 )
486 attribs[new_key] = value
487 if location_name in attribs:
488 parsed[member_name] = attribs[location_name]
489 return parsed
490
491 def _get_error_root(self, original_root):
492 if self._node_tag(original_root) == 'ErrorResponse':
493 for child in original_root:
494 if self._node_tag(child) == 'Error':
495 return child
496 return original_root
497
498 def _member_key_name(self, shape, member_name):
499 # This method is needed because we have to special case flattened list
500 # with a serialization name. If this is the case we use the
501 # locationName from the list's member shape as the key name for the
502 # surrounding structure.
503 if shape.type_name == 'list' and shape.serialization.get('flattened'):
504 list_member_serialized_name = shape.member.serialization.get(
505 'name'
506 )
507 if list_member_serialized_name is not None:
508 return list_member_serialized_name
509 serialized_name = shape.serialization.get('name')
510 if serialized_name is not None:
511 return serialized_name
512 return member_name
513
514 def _build_name_to_xml_node(self, parent_node):
515 # If the parent node is actually a list. We should not be trying
516 # to serialize it to a dictionary. Instead, return the first element
517 # in the list.
518 if isinstance(parent_node, list):
519 return self._build_name_to_xml_node(parent_node[0])
520 xml_dict = {}
521 for item in parent_node:
522 key = self._node_tag(item)
523 if key in xml_dict:
524 # If the key already exists, the most natural
525 # way to handle this is to aggregate repeated
526 # keys into a single list.
527 # <foo>1</foo><foo>2</foo> -> {'foo': [Node(1), Node(2)]}
528 if isinstance(xml_dict[key], list):
529 xml_dict[key].append(item)
530 else:
531 # Convert from a scalar to a list.
532 xml_dict[key] = [xml_dict[key], item]
533 else:
534 xml_dict[key] = item
535 return xml_dict
536
537 def _parse_xml_string_to_dom(self, xml_string):
538 try:
539 parser = ETree.XMLParser(
540 target=ETree.TreeBuilder(), encoding=self.DEFAULT_ENCODING
541 )
542 parser.feed(xml_string)
543 root = parser.close()
544 except XMLParseError as e:
545 raise ResponseParserError(
546 f"Unable to parse response ({e}), "
547 f"invalid XML received. Further retries may succeed:\n{xml_string}"
548 )
549 return root
550
551 def _replace_nodes(self, parsed):
552 for key, value in parsed.items():
553 if list(value):
554 sub_dict = self._build_name_to_xml_node(value)
555 parsed[key] = self._replace_nodes(sub_dict)
556 else:
557 parsed[key] = value.text
558 return parsed
559
560 @_text_content
561 def _handle_boolean(self, shape, text):
562 if text == 'true':
563 return True
564 else:
565 return False
566
567 @_text_content
568 def _handle_float(self, shape, text):
569 return float(text)
570
571 @_text_content
572 def _handle_timestamp(self, shape, text):
573 return self._timestamp_parser(text)
574
575 @_text_content
576 def _handle_integer(self, shape, text):
577 return int(text)
578
579 @_text_content
580 def _handle_string(self, shape, text):
581 return text
582
583 @_text_content
584 def _handle_blob(self, shape, text):
585 return self._blob_parser(text)
586
587 _handle_character = _handle_string
588 _handle_double = _handle_float
589 _handle_long = _handle_integer
590
591
592class QueryParser(BaseXMLResponseParser):
593 def _do_error_parse(self, response, shape):
594 xml_contents = response['body']
595 root = self._parse_xml_string_to_dom(xml_contents)
596 parsed = self._build_name_to_xml_node(root)
597 self._replace_nodes(parsed)
598 # Once we've converted xml->dict, we need to make one or two
599 # more adjustments to extract nested errors and to be consistent
600 # with ResponseMetadata for non-error responses:
601 # 1. {"Errors": {"Error": {...}}} -> {"Error": {...}}
602 # 2. {"RequestId": "id"} -> {"ResponseMetadata": {"RequestId": "id"}}
603 if 'Errors' in parsed:
604 parsed.update(parsed.pop('Errors'))
605 if 'RequestId' in parsed:
606 parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')}
607 return parsed
608
609 def _do_modeled_error_parse(self, response, shape):
610 return self._parse_body_as_xml(response, shape, inject_metadata=False)
611
612 def _do_parse(self, response, shape):
613 return self._parse_body_as_xml(response, shape, inject_metadata=True)
614
615 def _parse_body_as_xml(self, response, shape, inject_metadata=True):
616 xml_contents = response['body']
617 root = self._parse_xml_string_to_dom(xml_contents)
618 parsed = {}
619 if shape is not None:
620 start = root
621 if 'resultWrapper' in shape.serialization:
622 start = self._find_result_wrapped_shape(
623 shape.serialization['resultWrapper'], root
624 )
625 parsed = self._parse_shape(shape, start)
626 if inject_metadata:
627 self._inject_response_metadata(root, parsed)
628 return parsed
629
630 def _find_result_wrapped_shape(self, element_name, xml_root_node):
631 mapping = self._build_name_to_xml_node(xml_root_node)
632 return mapping[element_name]
633
634 def _inject_response_metadata(self, node, inject_into):
635 mapping = self._build_name_to_xml_node(node)
636 child_node = mapping.get('ResponseMetadata')
637 if child_node is not None:
638 sub_mapping = self._build_name_to_xml_node(child_node)
639 for key, value in sub_mapping.items():
640 sub_mapping[key] = value.text
641 inject_into['ResponseMetadata'] = sub_mapping
642
643
644class EC2QueryParser(QueryParser):
645 def _inject_response_metadata(self, node, inject_into):
646 mapping = self._build_name_to_xml_node(node)
647 child_node = mapping.get('requestId')
648 if child_node is not None:
649 inject_into['ResponseMetadata'] = {'RequestId': child_node.text}
650
651 def _do_error_parse(self, response, shape):
652 # EC2 errors look like:
653 # <Response>
654 # <Errors>
655 # <Error>
656 # <Code>InvalidInstanceID.Malformed</Code>
657 # <Message>Invalid id: "1343124"</Message>
658 # </Error>
659 # </Errors>
660 # <RequestID>12345</RequestID>
661 # </Response>
662 # This is different from QueryParser in that it's RequestID,
663 # not RequestId
664 original = super()._do_error_parse(response, shape)
665 if 'RequestID' in original:
666 original['ResponseMetadata'] = {
667 'RequestId': original.pop('RequestID')
668 }
669 return original
670
671 def _get_error_root(self, original_root):
672 for child in original_root:
673 if self._node_tag(child) == 'Errors':
674 for errors_child in child:
675 if self._node_tag(errors_child) == 'Error':
676 return errors_child
677 return original_root
678
679
680class BaseJSONParser(ResponseParser):
681 def _handle_structure(self, shape, value):
682 final_parsed = {}
683 if shape.is_document_type:
684 final_parsed = value
685 else:
686 member_shapes = shape.members
687 if value is None:
688 # If the comes across the wire as "null" (None in python),
689 # we should be returning this unchanged, instead of as an
690 # empty dict.
691 return None
692 final_parsed = {}
693 if self._has_unknown_tagged_union_member(shape, value):
694 tag = self._get_first_key(value)
695 return self._handle_unknown_tagged_union_member(tag)
696 for member_name in member_shapes:
697 member_shape = member_shapes[member_name]
698 json_name = member_shape.serialization.get('name', member_name)
699 raw_value = value.get(json_name)
700 if raw_value is not None:
701 final_parsed[member_name] = self._parse_shape(
702 member_shapes[member_name], raw_value
703 )
704 return final_parsed
705
706 def _handle_map(self, shape, value):
707 parsed = {}
708 key_shape = shape.key
709 value_shape = shape.value
710 for key, value in value.items():
711 actual_key = self._parse_shape(key_shape, key)
712 actual_value = self._parse_shape(value_shape, value)
713 parsed[actual_key] = actual_value
714 return parsed
715
716 def _handle_blob(self, shape, value):
717 return self._blob_parser(value)
718
719 def _handle_timestamp(self, shape, value):
720 return self._timestamp_parser(value)
721
722 def _do_error_parse(self, response, shape):
723 body = self._parse_body_as_json(response['body'])
724 error = {"Error": {"Message": '', "Code": ''}, "ResponseMetadata": {}}
725 headers = response['headers']
726 # Error responses can have slightly different structures for json.
727 # The basic structure is:
728 #
729 # {"__type":"ConnectClientException",
730 # "message":"The error message."}
731
732 # The error message can either come in the 'message' or 'Message' key
733 # so we need to check for both.
734 error['Error']['Message'] = body.get(
735 'message', body.get('Message', '')
736 )
737 # if the message did not contain an error code
738 # include the response status code
739 response_code = response.get('status_code')
740
741 code = body.get('__type', response_code and str(response_code))
742 if code is not None:
743 # code has a couple forms as well:
744 # * "com.aws.dynamodb.vAPI#ProvisionedThroughputExceededException"
745 # * "ResourceNotFoundException"
746 if ':' in code:
747 code = code.split(':', 1)[0]
748 if '#' in code:
749 code = code.rsplit('#', 1)[1]
750 if 'x-amzn-query-error' in headers:
751 code = self._do_query_compatible_error_parse(
752 code, headers, error
753 )
754 error['Error']['Code'] = code
755 self._inject_response_metadata(error, response['headers'])
756 return error
757
758 def _inject_response_metadata(self, parsed, headers):
759 if 'x-amzn-requestid' in headers:
760 parsed.setdefault('ResponseMetadata', {})['RequestId'] = headers[
761 'x-amzn-requestid'
762 ]
763
764 def _parse_body_as_json(self, body_contents):
765 if not body_contents:
766 return {}
767 body = body_contents.decode(self.DEFAULT_ENCODING)
768 try:
769 original_parsed = json.loads(body)
770 return original_parsed
771 except ValueError:
772 # if the body cannot be parsed, include
773 # the literal string as the message
774 return {'message': body}
775
776
777class BaseCBORParser(ResponseParser):
778 INDEFINITE_ITEM_ADDITIONAL_INFO = 31
779 BREAK_CODE = 0xFF
780
781 @CachedProperty
782 def major_type_to_parsing_method_map(self):
783 return {
784 0: self._parse_unsigned_integer,
785 1: self._parse_negative_integer,
786 2: self._parse_byte_string,
787 3: self._parse_text_string,
788 4: self._parse_array,
789 5: self._parse_map,
790 6: self._parse_tag,
791 7: self._parse_simple_and_float,
792 }
793
794 def get_peekable_stream_from_bytes(self, bytes):
795 return io.BufferedReader(io.BytesIO(bytes))
796
797 def parse_data_item(self, stream):
798 # CBOR data is divided into "data items", and each data item starts
799 # with an initial byte that describes how the following bytes should be parsed
800 initial_byte = self._read_bytes_as_int(stream, 1)
801 # The highest order three bits of the initial byte describe the CBOR major type
802 major_type = initial_byte >> 5
803 # The lowest order 5 bits of the initial byte tells us more information about
804 # how the bytes should be parsed that will be used
805 additional_info = initial_byte & 0b00011111
806
807 if major_type in self.major_type_to_parsing_method_map:
808 method = self.major_type_to_parsing_method_map[major_type]
809 return method(stream, additional_info)
810 else:
811 raise ResponseParserError(
812 f"Unsupported inital byte found for data item- "
813 f"Major type:{major_type}, Additional info: "
814 f"{additional_info}"
815 )
816
817 # Major type 0 - unsigned integers
818 def _parse_unsigned_integer(self, stream, additional_info):
819 additional_info_to_num_bytes = {
820 24: 1,
821 25: 2,
822 26: 4,
823 27: 8,
824 }
825 # Values under 24 don't need a full byte to be stored; their values are
826 # instead stored as the "additional info" in the initial byte
827 if additional_info < 24:
828 return additional_info
829 elif additional_info in additional_info_to_num_bytes:
830 num_bytes = additional_info_to_num_bytes[additional_info]
831 return self._read_bytes_as_int(stream, num_bytes)
832 else:
833 raise ResponseParserError(
834 "Invalid CBOR integer returned from the service; unparsable "
835 f"additional info found for major type 0 or 1: {additional_info}"
836 )
837
838 # Major type 1 - negative integers
839 def _parse_negative_integer(self, stream, additional_info):
840 return -1 - self._parse_unsigned_integer(stream, additional_info)
841
842 # Major type 2 - byte string
843 def _parse_byte_string(self, stream, additional_info):
844 if additional_info != self.INDEFINITE_ITEM_ADDITIONAL_INFO:
845 length = self._parse_unsigned_integer(stream, additional_info)
846 return self._read_from_stream(stream, length)
847 else:
848 chunks = []
849 while True:
850 if self._handle_break_code(stream):
851 break
852 initial_byte = self._read_bytes_as_int(stream, 1)
853 additional_info = initial_byte & 0b00011111
854 length = self._parse_unsigned_integer(stream, additional_info)
855 chunks.append(self._read_from_stream(stream, length))
856 return b''.join(chunks)
857
858 # Major type 3 - text string
859 def _parse_text_string(self, stream, additional_info):
860 return self._parse_byte_string(stream, additional_info).decode('utf-8')
861
862 # Major type 4 - lists
863 def _parse_array(self, stream, additional_info):
864 if additional_info != self.INDEFINITE_ITEM_ADDITIONAL_INFO:
865 length = self._parse_unsigned_integer(stream, additional_info)
866 return [self.parse_data_item(stream) for _ in range(length)]
867 else:
868 items = []
869 while not self._handle_break_code(stream):
870 items.append(self.parse_data_item(stream))
871 return items
872
873 # Major type 5 - maps
874 def _parse_map(self, stream, additional_info):
875 items = {}
876 if additional_info != self.INDEFINITE_ITEM_ADDITIONAL_INFO:
877 length = self._parse_unsigned_integer(stream, additional_info)
878 for _ in range(length):
879 self._parse_key_value_pair(stream, items)
880 return items
881
882 else:
883 while not self._handle_break_code(stream):
884 self._parse_key_value_pair(stream, items)
885 return items
886
887 def _parse_key_value_pair(self, stream, items):
888 key = self.parse_data_item(stream)
889 value = self.parse_data_item(stream)
890 if value is not None:
891 items[key] = value
892
893 # Major type 6 is tags. The only tag we currently support is tag 1 for unix
894 # timestamps
895 def _parse_tag(self, stream, additional_info):
896 tag = self._parse_unsigned_integer(stream, additional_info)
897 value = self.parse_data_item(stream)
898 if tag == 1: # Epoch-based date/time in milliseconds
899 return self._parse_datetime(value)
900 else:
901 raise ResponseParserError(
902 f"Found CBOR tag not supported by botocore: {tag}"
903 )
904
905 def _parse_datetime(self, value):
906 if isinstance(value, (int, float)):
907 return self._timestamp_parser(value)
908 else:
909 raise ResponseParserError(
910 f"Unable to parse datetime value: {value}"
911 )
912
913 # Major type 7 includes floats and "simple" types. Supported simple types are
914 # currently boolean values, CBOR's null, and CBOR's undefined type. All other
915 # values are either floats or invalid.
916 def _parse_simple_and_float(self, stream, additional_info):
917 # For major type 7, values 20-23 correspond to CBOR "simple" values
918 additional_info_simple_values = {
919 20: False, # CBOR false
920 21: True, # CBOR true
921 22: None, # CBOR null
922 23: None, # CBOR undefined
923 }
924 # First we check if the additional info corresponds to a supported simple value
925 if additional_info in additional_info_simple_values:
926 return additional_info_simple_values[additional_info]
927
928 # If it's not a simple value, we need to parse it into the correct format and
929 # number fo bytes
930 float_formats = {
931 25: ('>e', 2),
932 26: ('>f', 4),
933 27: ('>d', 8),
934 }
935
936 if additional_info in float_formats:
937 float_format, num_bytes = float_formats[additional_info]
938 return struct.unpack(
939 float_format, self._read_from_stream(stream, num_bytes)
940 )[0]
941 raise ResponseParserError(
942 f"Invalid additional info found for major type 7: {additional_info}. "
943 f"This indicates an unsupported simple type or an indefinite float value"
944 )
945
946 # This helper method is intended for use when parsing indefinite length items.
947 # It does nothing if the next byte is not the break code. If the next byte is
948 # the break code, it advances past that byte and returns True so the calling
949 # method knows to stop parsing that data item.
950 def _handle_break_code(self, stream):
951 if int.from_bytes(stream.peek(1)[:1], 'big') == self.BREAK_CODE:
952 stream.seek(1, os.SEEK_CUR)
953 return True
954
955 def _read_bytes_as_int(self, stream, num_bytes):
956 byte = self._read_from_stream(stream, num_bytes)
957 return int.from_bytes(byte, 'big')
958
959 def _read_from_stream(self, stream, num_bytes):
960 value = stream.read(num_bytes)
961 if len(value) != num_bytes:
962 raise ResponseParserError(
963 "End of stream reached; this indicates a "
964 "malformed CBOR response from the server or an "
965 "issue in botocore"
966 )
967 return value
968
969
970class BaseEventStreamParser(ResponseParser):
971 def _do_parse(self, response, shape):
972 final_parsed = {}
973 if shape.serialization.get('eventstream'):
974 event_type = response['headers'].get(':event-type')
975 event_shape = shape.members.get(event_type)
976 if event_shape:
977 final_parsed[event_type] = self._do_parse(
978 response, event_shape
979 )
980 else:
981 self._parse_non_payload_attrs(
982 response, shape, shape.members, final_parsed
983 )
984 self._parse_payload(response, shape, shape.members, final_parsed)
985 return final_parsed
986
987 def _do_error_parse(self, response, shape):
988 exception_type = response['headers'].get(':exception-type')
989 exception_shape = shape.members.get(exception_type)
990 if exception_shape is not None:
991 original_parsed = self._initial_body_parse(response['body'])
992 body = self._parse_shape(exception_shape, original_parsed)
993 error = {
994 'Error': {
995 'Code': exception_type,
996 'Message': body.get('Message', body.get('message', '')),
997 }
998 }
999 else:
1000 error = {
1001 'Error': {
1002 'Code': response['headers'].get(':error-code', ''),
1003 'Message': response['headers'].get(':error-message', ''),
1004 }
1005 }
1006 return error
1007
1008 def _parse_payload(self, response, shape, member_shapes, final_parsed):
1009 if shape.serialization.get('event'):
1010 for name in member_shapes:
1011 member_shape = member_shapes[name]
1012 if member_shape.serialization.get('eventpayload'):
1013 body = response['body']
1014 if member_shape.type_name == 'blob':
1015 parsed_body = body
1016 elif member_shape.type_name == 'string':
1017 parsed_body = body.decode(self.DEFAULT_ENCODING)
1018 else:
1019 raw_parse = self._initial_body_parse(body)
1020 parsed_body = self._parse_shape(
1021 member_shape, raw_parse
1022 )
1023 final_parsed[name] = parsed_body
1024 return
1025 # If we didn't find an explicit payload, use the current shape
1026 original_parsed = self._initial_body_parse(response['body'])
1027 body_parsed = self._parse_shape(shape, original_parsed)
1028 final_parsed.update(body_parsed)
1029
1030 def _parse_non_payload_attrs(
1031 self, response, shape, member_shapes, final_parsed
1032 ):
1033 headers = response['headers']
1034 for name in member_shapes:
1035 member_shape = member_shapes[name]
1036 if member_shape.serialization.get('eventheader'):
1037 if name in headers:
1038 value = headers[name]
1039 if member_shape.type_name == 'timestamp':
1040 # Event stream timestamps are an in milleseconds so we
1041 # divide by 1000 to convert to seconds.
1042 value = self._timestamp_parser(value / 1000.0)
1043 final_parsed[name] = value
1044
1045 def _initial_body_parse(self, body_contents):
1046 # This method should do the initial xml/json parsing of the
1047 # body. We still need to walk the parsed body in order
1048 # to convert types, but this method will do the first round
1049 # of parsing.
1050 raise NotImplementedError("_initial_body_parse")
1051
1052
1053class EventStreamJSONParser(BaseEventStreamParser, BaseJSONParser):
1054 def _initial_body_parse(self, body_contents):
1055 return self._parse_body_as_json(body_contents)
1056
1057
1058class EventStreamXMLParser(BaseEventStreamParser, BaseXMLResponseParser):
1059 def _initial_body_parse(self, xml_string):
1060 if not xml_string:
1061 return ETree.Element('')
1062 return self._parse_xml_string_to_dom(xml_string)
1063
1064
1065class EventStreamCBORParser(BaseEventStreamParser, BaseCBORParser):
1066 def _initial_body_parse(self, body_contents):
1067 if body_contents == b'':
1068 return {}
1069 return self.parse_data_item(
1070 self.get_peekable_stream_from_bytes(body_contents)
1071 )
1072
1073
1074class JSONParser(BaseJSONParser):
1075 EVENT_STREAM_PARSER_CLS = EventStreamJSONParser
1076
1077 """Response parser for the "json" protocol."""
1078
1079 def _do_parse(self, response, shape):
1080 parsed = {}
1081 if shape is not None:
1082 event_name = shape.event_stream_name
1083 if event_name:
1084 parsed = self._handle_event_stream(response, shape, event_name)
1085 else:
1086 parsed = self._handle_json_body(response['body'], shape)
1087 self._inject_response_metadata(parsed, response['headers'])
1088 return parsed
1089
1090 def _do_modeled_error_parse(self, response, shape):
1091 return self._handle_json_body(response['body'], shape)
1092
1093 def _handle_event_stream(self, response, shape, event_name):
1094 event_stream_shape = shape.members[event_name]
1095 event_stream = self._create_event_stream(response, event_stream_shape)
1096 try:
1097 event = event_stream.get_initial_response()
1098 except NoInitialResponseError:
1099 error_msg = 'First event was not of type initial-response'
1100 raise ResponseParserError(error_msg)
1101 parsed = self._handle_json_body(event.payload, shape)
1102 parsed[event_name] = event_stream
1103 return parsed
1104
1105 def _handle_json_body(self, raw_body, shape):
1106 # The json.loads() gives us the primitive JSON types,
1107 # but we need to traverse the parsed JSON data to convert
1108 # to richer types (blobs, timestamps, etc.
1109 parsed_json = self._parse_body_as_json(raw_body)
1110 return self._parse_shape(shape, parsed_json)
1111
1112
1113class BaseRestParser(ResponseParser):
1114 def _do_parse(self, response, shape):
1115 final_parsed = {}
1116 final_parsed['ResponseMetadata'] = self._populate_response_metadata(
1117 response
1118 )
1119 self._add_modeled_parse(response, shape, final_parsed)
1120 return final_parsed
1121
1122 def _add_modeled_parse(self, response, shape, final_parsed):
1123 if shape is None:
1124 return final_parsed
1125 member_shapes = shape.members
1126 self._parse_non_payload_attrs(
1127 response, shape, member_shapes, final_parsed
1128 )
1129 self._parse_payload(response, shape, member_shapes, final_parsed)
1130
1131 def _do_modeled_error_parse(self, response, shape):
1132 final_parsed = {}
1133 self._add_modeled_parse(response, shape, final_parsed)
1134 return final_parsed
1135
1136 def _populate_response_metadata(self, response):
1137 metadata = {}
1138 headers = response['headers']
1139 if 'x-amzn-requestid' in headers:
1140 metadata['RequestId'] = headers['x-amzn-requestid']
1141 elif 'x-amz-request-id' in headers:
1142 metadata['RequestId'] = headers['x-amz-request-id']
1143 # HostId is what it's called whenever this value is returned
1144 # in an XML response body, so to be consistent, we'll always
1145 # call is HostId.
1146 metadata['HostId'] = headers.get('x-amz-id-2', '')
1147 return metadata
1148
1149 def _parse_payload(self, response, shape, member_shapes, final_parsed):
1150 if 'payload' in shape.serialization:
1151 # If a payload is specified in the output shape, then only that
1152 # shape is used for the body payload.
1153 payload_member_name = shape.serialization['payload']
1154 body_shape = member_shapes[payload_member_name]
1155 if body_shape.serialization.get('eventstream'):
1156 body = self._create_event_stream(response, body_shape)
1157 final_parsed[payload_member_name] = body
1158 elif body_shape.type_name in ['string', 'blob']:
1159 # This is a stream
1160 body = response['body']
1161 if isinstance(body, bytes):
1162 body = body.decode(self.DEFAULT_ENCODING)
1163 final_parsed[payload_member_name] = body
1164 else:
1165 original_parsed = self._initial_body_parse(response['body'])
1166 final_parsed[payload_member_name] = self._parse_shape(
1167 body_shape, original_parsed
1168 )
1169 else:
1170 original_parsed = self._initial_body_parse(response['body'])
1171 body_parsed = self._parse_shape(shape, original_parsed)
1172 final_parsed.update(body_parsed)
1173
1174 def _parse_non_payload_attrs(
1175 self, response, shape, member_shapes, final_parsed
1176 ):
1177 headers = response['headers']
1178 for name in member_shapes:
1179 member_shape = member_shapes[name]
1180 location = member_shape.serialization.get('location')
1181 if location is None:
1182 continue
1183 elif location == 'statusCode':
1184 final_parsed[name] = self._parse_shape(
1185 member_shape, response['status_code']
1186 )
1187 elif location == 'headers':
1188 final_parsed[name] = self._parse_header_map(
1189 member_shape, headers
1190 )
1191 elif location == 'header':
1192 header_name = member_shape.serialization.get('name', name)
1193 if header_name in headers:
1194 final_parsed[name] = self._parse_shape(
1195 member_shape, headers[header_name]
1196 )
1197
1198 def _parse_header_map(self, shape, headers):
1199 # Note that headers are case insensitive, so we .lower()
1200 # all header names and header prefixes.
1201 parsed = {}
1202 prefix = shape.serialization.get('name', '').lower()
1203 for header_name in headers:
1204 if header_name.lower().startswith(prefix):
1205 # The key name inserted into the parsed hash
1206 # strips off the prefix.
1207 name = header_name[len(prefix) :]
1208 parsed[name] = headers[header_name]
1209 return parsed
1210
1211 def _initial_body_parse(self, body_contents):
1212 # This method should do the initial xml/json parsing of the
1213 # body. We still need to walk the parsed body in order
1214 # to convert types, but this method will do the first round
1215 # of parsing.
1216 raise NotImplementedError("_initial_body_parse")
1217
1218 def _handle_string(self, shape, value):
1219 parsed = value
1220 if is_json_value_header(shape):
1221 decoded = base64.b64decode(value).decode(self.DEFAULT_ENCODING)
1222 parsed = json.loads(decoded)
1223 return parsed
1224
1225 def _handle_list(self, shape, node):
1226 location = shape.serialization.get('location')
1227 if location == 'header' and not isinstance(node, list):
1228 # List in headers may be a comma separated string as per RFC7230
1229 node = [e.strip() for e in node.split(',')]
1230 return super()._handle_list(shape, node)
1231
1232
1233class BaseRpcV2Parser(ResponseParser):
1234 def _do_parse(self, response, shape):
1235 parsed = {}
1236 if shape is not None:
1237 event_stream_name = shape.event_stream_name
1238 if event_stream_name:
1239 parsed = self._handle_event_stream(
1240 response, shape, event_stream_name
1241 )
1242 else:
1243 parsed = {}
1244 self._parse_payload(response, shape, parsed)
1245 parsed['ResponseMetadata'] = self._populate_response_metadata(
1246 response
1247 )
1248 return parsed
1249
1250 def _add_modeled_parse(self, response, shape, final_parsed):
1251 if shape is None:
1252 return final_parsed
1253 self._parse_payload(response, shape, final_parsed)
1254
1255 def _do_modeled_error_parse(self, response, shape):
1256 final_parsed = {}
1257 self._add_modeled_parse(response, shape, final_parsed)
1258 return final_parsed
1259
1260 def _populate_response_metadata(self, response):
1261 metadata = {}
1262 headers = response['headers']
1263 if 'x-amzn-requestid' in headers:
1264 metadata['RequestId'] = headers['x-amzn-requestid']
1265 return metadata
1266
1267 def _handle_structure(self, shape, node):
1268 parsed = {}
1269 members = shape.members
1270 if shape.is_tagged_union:
1271 cleaned_value = node.copy()
1272 cleaned_value.pop("__type", None)
1273 cleaned_value = {
1274 k: v for k, v in cleaned_value.items() if v is not None
1275 }
1276 if len(cleaned_value) != 1:
1277 error_msg = (
1278 "Invalid service response: %s must have one and only "
1279 "one member set."
1280 )
1281 raise ResponseParserError(error_msg % shape.name)
1282 for member_name in members:
1283 member_shape = members[member_name]
1284 member_node = node.get(member_name)
1285 if member_node is not None:
1286 parsed[member_name] = self._parse_shape(
1287 member_shape, member_node
1288 )
1289 return parsed
1290
1291 def _parse_payload(self, response, shape, final_parsed):
1292 original_parsed = self._initial_body_parse(response['body'])
1293 body_parsed = self._parse_shape(shape, original_parsed)
1294 final_parsed.update(body_parsed)
1295
1296 def _initial_body_parse(self, body_contents):
1297 # This method should do the initial parsing of the
1298 # body. We still need to walk the parsed body in order
1299 # to convert types, but this method will do the first round
1300 # of parsing.
1301 raise NotImplementedError("_initial_body_parse")
1302
1303
1304class RestJSONParser(BaseRestParser, BaseJSONParser):
1305 EVENT_STREAM_PARSER_CLS = EventStreamJSONParser
1306
1307 def _initial_body_parse(self, body_contents):
1308 return self._parse_body_as_json(body_contents)
1309
1310 def _do_error_parse(self, response, shape):
1311 error = super()._do_error_parse(response, shape)
1312 self._inject_error_code(error, response)
1313 return error
1314
1315 def _inject_error_code(self, error, response):
1316 # The "Code" value can come from either a response
1317 # header or a value in the JSON body.
1318 body = self._initial_body_parse(response['body'])
1319 code = None
1320 if 'x-amzn-errortype' in response['headers']:
1321 code = response['headers']['x-amzn-errortype']
1322 elif 'code' in body or 'Code' in body:
1323 code = body.get('code', body.get('Code', ''))
1324 if code is None:
1325 return
1326 if isinstance(code, str):
1327 code = code.split(':', 1)[0].rsplit('#', 1)[-1]
1328 error['Error']['Code'] = code
1329
1330 def _handle_boolean(self, shape, value):
1331 return ensure_boolean(value)
1332
1333 def _handle_integer(self, shape, value):
1334 return int(value)
1335
1336 def _handle_float(self, shape, value):
1337 return float(value)
1338
1339 _handle_long = _handle_integer
1340 _handle_double = _handle_float
1341
1342
1343class RpcV2CBORParser(BaseRpcV2Parser, BaseCBORParser):
1344 EVENT_STREAM_PARSER_CLS = EventStreamCBORParser
1345
1346 def _initial_body_parse(self, body_contents):
1347 if body_contents == b'':
1348 return body_contents
1349 body_contents_stream = self.get_peekable_stream_from_bytes(
1350 body_contents
1351 )
1352 return self.parse_data_item(body_contents_stream)
1353
1354 def _do_error_parse(self, response, shape):
1355 body = self._initial_body_parse(response['body'])
1356 error = {
1357 "Error": {
1358 "Message": body.get('message', body.get('Message', '')),
1359 "Code": '',
1360 },
1361 "ResponseMetadata": {},
1362 }
1363 headers = response['headers']
1364
1365 code = body.get('__type')
1366 if code is None:
1367 response_code = response.get('status_code')
1368 if response_code is not None:
1369 code = str(response_code)
1370 if code is not None:
1371 if ':' in code:
1372 code = code.split(':', 1)[0]
1373 if '#' in code:
1374 code = code.rsplit('#', 1)[1]
1375 if 'x-amzn-query-error' in headers:
1376 code = self._do_query_compatible_error_parse(
1377 code, headers, error
1378 )
1379 error['Error']['Code'] = code
1380 if 'x-amzn-requestid' in headers:
1381 error.setdefault('ResponseMetadata', {})['RequestId'] = headers[
1382 'x-amzn-requestid'
1383 ]
1384 return error
1385
1386 def _handle_event_stream(self, response, shape, event_name):
1387 event_stream_shape = shape.members[event_name]
1388 event_stream = self._create_event_stream(response, event_stream_shape)
1389 try:
1390 event = event_stream.get_initial_response()
1391 except NoInitialResponseError:
1392 error_msg = 'First event was not of type initial-response'
1393 raise ResponseParserError(error_msg)
1394 parsed = self._initial_body_parse(event.payload)
1395 parsed[event_name] = event_stream
1396 return parsed
1397
1398
1399class RestXMLParser(BaseRestParser, BaseXMLResponseParser):
1400 EVENT_STREAM_PARSER_CLS = EventStreamXMLParser
1401
1402 def _initial_body_parse(self, xml_string):
1403 if not xml_string:
1404 return ETree.Element('')
1405 return self._parse_xml_string_to_dom(xml_string)
1406
1407 def _do_error_parse(self, response, shape):
1408 # We're trying to be service agnostic here, but S3 does have a slightly
1409 # different response structure for its errors compared to other
1410 # rest-xml serivces (route53/cloudfront). We handle this by just
1411 # trying to parse both forms.
1412 # First:
1413 # <ErrorResponse xmlns="...">
1414 # <Error>
1415 # <Type>Sender</Type>
1416 # <Code>InvalidInput</Code>
1417 # <Message>Invalid resource type: foo</Message>
1418 # </Error>
1419 # <RequestId>request-id</RequestId>
1420 # </ErrorResponse>
1421 if response['body']:
1422 # If the body ends up being invalid xml, the xml parser should not
1423 # blow up. It should at least try to pull information about the
1424 # the error response from other sources like the HTTP status code.
1425 try:
1426 return self._parse_error_from_body(response)
1427 except ResponseParserError:
1428 LOG.debug(
1429 'Exception caught when parsing error response body:',
1430 exc_info=True,
1431 )
1432 return self._parse_error_from_http_status(response)
1433
1434 def _parse_error_from_http_status(self, response):
1435 return {
1436 'Error': {
1437 'Code': str(response['status_code']),
1438 'Message': http.client.responses.get(
1439 response['status_code'], ''
1440 ),
1441 },
1442 'ResponseMetadata': {
1443 'RequestId': response['headers'].get('x-amz-request-id', ''),
1444 'HostId': response['headers'].get('x-amz-id-2', ''),
1445 },
1446 }
1447
1448 def _parse_error_from_body(self, response):
1449 xml_contents = response['body']
1450 root = self._parse_xml_string_to_dom(xml_contents)
1451 parsed = self._build_name_to_xml_node(root)
1452 self._replace_nodes(parsed)
1453 if root.tag == 'Error':
1454 # This is an S3 error response. First we'll populate the
1455 # response metadata.
1456 metadata = self._populate_response_metadata(response)
1457 # The RequestId and the HostId are already in the
1458 # ResponseMetadata, but are also duplicated in the XML
1459 # body. We don't need these values in both places,
1460 # we'll just remove them from the parsed XML body.
1461 parsed.pop('RequestId', '')
1462 parsed.pop('HostId', '')
1463 return {'Error': parsed, 'ResponseMetadata': metadata}
1464 elif 'RequestId' in parsed:
1465 # Other rest-xml services:
1466 parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')}
1467 default = {'Error': {'Message': '', 'Code': ''}}
1468 merge_dicts(default, parsed)
1469 return default
1470
1471 @_text_content
1472 def _handle_string(self, shape, text):
1473 text = super()._handle_string(shape, text)
1474 return text
1475
1476
1477PROTOCOL_PARSERS = {
1478 'ec2': EC2QueryParser,
1479 'query': QueryParser,
1480 'json': JSONParser,
1481 'rest-json': RestJSONParser,
1482 'rest-xml': RestXMLParser,
1483 'smithy-rpc-v2-cbor': RpcV2CBORParser,
1484}