1# Copyright 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License"). You
4# may not use this file except in compliance with the License. A copy of
5# the License is located at
6#
7# http://aws.amazon.com/apache2.0/
8#
9# or in the "license" file accompanying this file. This file is
10# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11# ANY KIND, either express or implied. See the License for the specific
12# language governing permissions and limitations under the License.
13"""Response parsers for the various protocol types.
14
15The module contains classes that can take an HTTP response, and given
16an output shape, parse the response into a dict according to the
17rules in the output shape.
18
19There are many similarities amongst the different protocols with regard
20to response parsing, and the code is structured in a way to avoid
21code duplication when possible. The diagram below is a diagram
22showing the inheritance hierarchy of the response classes.
23
24::
25
26
27 +-------------------+
28 | ResponseParser |
29 +-------------------+
30 ^ ^ ^ ^ ^
31 | | | | |
32 | | | | +--------------------------------------------+
33 | | | +-----------------------------+ |
34 | | | | |
35 +--------------------+ | +----------------+ | |
36 | | | | |
37+----------+----------+ +------+-------+ +-------+------+ +------+-------+ +------+--------+
38|BaseXMLResponseParser| |BaseRestParser| |BaseJSONParser| |BaseCBORParser| |BaseRpcV2Parser|
39+---------------------+ +--------------+ +--------------+ +----------+---+ +-+-------------+
40 ^ ^ ^ ^ ^ ^ ^ ^
41 | | | | | | | |
42 | | | | | | | |
43 | ++----------+-+ +-+--------+---+ | +---+---------+-+
44 | |RestXMLParser| |RestJSONParser| | |RpcV2CBORParser|
45 +-----+-----+ +-------------+ +--------------+ | +---+---------+-+
46 |QueryParser| |
47 +-----------+ +----+-----+
48 |JSONParser|
49 +----------+
50
51The diagram above shows that there is a base class, ``ResponseParser`` that
52contains logic that is similar amongst all the different protocols (``query``,
53``json``, ``rest-json``, ``rest-xml``, ``smithy-rpc-v2-cbor``). Amongst the various services
54there is shared logic that can be grouped several ways:
55
56* The ``query`` and ``rest-xml`` both have XML bodies that are parsed in the
57 same way.
58* The ``json`` and ``rest-json`` protocols both have JSON bodies that are
59 parsed in the same way.
60* The ``rest-json`` and ``rest-xml`` protocols have additional attributes
61 besides body parameters that are parsed the same (headers, query string,
62 status code).
63
64This is reflected in the class diagram above. The ``BaseXMLResponseParser``
65and the BaseJSONParser contain logic for parsing the XML/JSON body,
66and the BaseRestParser contains logic for parsing out attributes that
67come from other parts of the HTTP response. Classes like the
68``RestXMLParser`` inherit from the ``BaseXMLResponseParser`` to get the
69XML body parsing logic and the ``BaseRestParser`` to get the HTTP
70header/status code/query string parsing.
71
72Additionally, there are event stream parsers that are used by the other parsers
73to wrap streaming bodies that represent a stream of events. The
74BaseEventStreamParser extends from ResponseParser and defines the logic for
75parsing values from the headers and payload of a message from the underlying
76binary encoding protocol. Currently, event streams support parsing bodies
77encoded as JSON and XML through the following hierarchy.
78
79
80 +--------------+
81 |ResponseParser|
82 +--------------+
83 ^ ^ ^
84 +--------------------+ | +------------------+
85 | | |
86 +----------+----------+ +----------+----------+ +-------+------+
87 |BaseXMLResponseParser| |BaseEventStreamParser| |BaseJSONParser|
88 +---------------------+ +---------------------+ +--------------+
89 ^ ^ ^ ^
90 | | | |
91 | | | |
92 +-+----------------+-+ +-+-----------------+-+
93 |EventStreamXMLParser| |EventStreamJSONParser|
94 +--------------------+ +---------------------+
95
96Return Values
97=============
98
99Each call to ``parse()`` returns a dict has this form::
100
101 Standard Response
102
103 {
104 "ResponseMetadata": {"RequestId": <requestid>}
105 <response keys>
106 }
107
108 Error response
109
110 {
111 "ResponseMetadata": {"RequestId": <requestid>}
112 "Error": {
113 "Code": <string>,
114 "Message": <string>,
115 "Type": <string>,
116 <additional keys>
117 }
118 }
119
120"""
121
122import base64
123import http.client
124import io
125import json
126import logging
127import os
128import re
129import struct
130
131from botocore.compat import ETree, XMLParseError
132from botocore.eventstream import EventStream, NoInitialResponseError
133from botocore.utils import (
134 CachedProperty,
135 ensure_boolean,
136 is_json_value_header,
137 lowercase_dict,
138 merge_dicts,
139 parse_timestamp,
140)
141
142LOG = logging.getLogger(__name__)
143
144DEFAULT_TIMESTAMP_PARSER = parse_timestamp
145
146
147class ResponseParserFactory:
148 def __init__(self):
149 self._defaults = {}
150
151 def set_parser_defaults(self, **kwargs):
152 """Set default arguments when a parser instance is created.
153
154 You can specify any kwargs that are allowed by a ResponseParser
155 class. There are currently two arguments:
156
157 * timestamp_parser - A callable that can parse a timestamp string
158 * blob_parser - A callable that can parse a blob type
159
160 """
161 self._defaults.update(kwargs)
162
163 def create_parser(self, protocol_name):
164 parser_cls = PROTOCOL_PARSERS[protocol_name]
165 return parser_cls(**self._defaults)
166
167
168def create_parser(protocol):
169 return ResponseParserFactory().create_parser(protocol)
170
171
172def _text_content(func):
173 # This decorator hides the difference between
174 # an XML node with text or a plain string. It's used
175 # to ensure that scalar processing operates only on text
176 # strings, which allows the same scalar handlers to be used
177 # for XML nodes from the body and HTTP headers.
178 def _get_text_content(self, shape, node_or_string):
179 if hasattr(node_or_string, 'text'):
180 text = node_or_string.text
181 if text is None:
182 # If an XML node is empty <foo></foo>,
183 # we want to parse that as an empty string,
184 # not as a null/None value.
185 text = ''
186 else:
187 text = node_or_string
188 return func(self, shape, text)
189
190 return _get_text_content
191
192
193class ResponseParserError(Exception):
194 pass
195
196
197class ResponseParser:
198 """Base class for response parsing.
199
200 This class represents the interface that all ResponseParsers for the
201 various protocols must implement.
202
203 This class will take an HTTP response and a model shape and parse the
204 HTTP response into a dictionary.
205
206 There is a single public method exposed: ``parse``. See the ``parse``
207 docstring for more info.
208
209 """
210
211 DEFAULT_ENCODING = 'utf-8'
212 EVENT_STREAM_PARSER_CLS = None
213
214 def __init__(self, timestamp_parser=None, blob_parser=None):
215 if timestamp_parser is None:
216 timestamp_parser = DEFAULT_TIMESTAMP_PARSER
217 self._timestamp_parser = timestamp_parser
218 if blob_parser is None:
219 blob_parser = self._default_blob_parser
220 self._blob_parser = blob_parser
221 self._event_stream_parser = None
222 if self.EVENT_STREAM_PARSER_CLS is not None:
223 self._event_stream_parser = self.EVENT_STREAM_PARSER_CLS(
224 timestamp_parser, blob_parser
225 )
226
227 def _default_blob_parser(self, value):
228 # Blobs are always returned as bytes type (this matters on python3).
229 # We don't decode this to a str because it's entirely possible that the
230 # blob contains binary data that actually can't be decoded.
231 return base64.b64decode(value)
232
233 def parse(self, response, shape):
234 """Parse the HTTP response given a shape.
235
236 :param response: The HTTP response dictionary. This is a dictionary
237 that represents the HTTP request. The dictionary must have the
238 following keys, ``body``, ``headers``, and ``status_code``.
239
240 :param shape: The model shape describing the expected output.
241 :return: Returns a dictionary representing the parsed response
242 described by the model. In addition to the shape described from
243 the model, each response will also have a ``ResponseMetadata``
244 which contains metadata about the response, which contains at least
245 two keys containing ``RequestId`` and ``HTTPStatusCode``. Some
246 responses may populate additional keys, but ``RequestId`` will
247 always be present.
248
249 """
250 LOG.debug('Response headers: %r', response['headers'])
251 LOG.debug('Response body:\n%r', response['body'])
252 if response['status_code'] >= 301:
253 if self._is_generic_error_response(response):
254 parsed = self._do_generic_error_parse(response)
255 elif self._is_modeled_error_shape(shape):
256 parsed = self._do_modeled_error_parse(response, shape)
257 # We don't want to decorate the modeled fields with metadata
258 return parsed
259 else:
260 parsed = self._do_error_parse(response, shape)
261 else:
262 parsed = self._do_parse(response, shape)
263
264 # We don't want to decorate event stream responses with metadata
265 if shape and shape.serialization.get('eventstream'):
266 return parsed
267
268 # Add ResponseMetadata if it doesn't exist and inject the HTTP
269 # status code and headers from the response.
270 if isinstance(parsed, dict):
271 response_metadata = parsed.get('ResponseMetadata', {})
272 response_metadata['HTTPStatusCode'] = response['status_code']
273 # Ensure that the http header keys are all lower cased. Older
274 # versions of urllib3 (< 1.11) would unintentionally do this for us
275 # (see urllib3#633). We need to do this conversion manually now.
276 headers = response['headers']
277 response_metadata['HTTPHeaders'] = lowercase_dict(headers)
278 parsed['ResponseMetadata'] = response_metadata
279 self._add_checksum_response_metadata(response, response_metadata)
280 return parsed
281
282 def _add_checksum_response_metadata(self, response, response_metadata):
283 checksum_context = response.get('context', {}).get('checksum', {})
284 algorithm = checksum_context.get('response_algorithm')
285 if algorithm:
286 response_metadata['ChecksumAlgorithm'] = algorithm
287
288 def _is_modeled_error_shape(self, shape):
289 return shape is not None and shape.metadata.get('exception', False)
290
291 def _is_generic_error_response(self, response):
292 # There are times when a service will respond with a generic
293 # error response such as:
294 # '<html><body><b>Http/1.1 Service Unavailable</b></body></html>'
295 #
296 # This can also happen if you're going through a proxy.
297 # In this case the protocol specific _do_error_parse will either
298 # fail to parse the response (in the best case) or silently succeed
299 # and treat the HTML above as an XML response and return
300 # non sensical parsed data.
301 # To prevent this case from happening we first need to check
302 # whether or not this response looks like the generic response.
303 if response['status_code'] >= 500:
304 if 'body' not in response or response['body'] is None:
305 return True
306
307 body = response['body'].strip()
308 return body.startswith(b'<html>') or not body
309
310 def _do_generic_error_parse(self, response):
311 # There's not really much we can do when we get a generic
312 # html response.
313 LOG.debug(
314 "Received a non protocol specific error response from the "
315 "service, unable to populate error code and message."
316 )
317 return {
318 'Error': {
319 'Code': str(response['status_code']),
320 'Message': http.client.responses.get(
321 response['status_code'], ''
322 ),
323 },
324 'ResponseMetadata': {},
325 }
326
327 def _do_parse(self, response, shape):
328 raise NotImplementedError(f"{self.__class__.__name__}._do_parse")
329
330 def _do_error_parse(self, response, shape):
331 raise NotImplementedError(f"{self.__class__.__name__}._do_error_parse")
332
333 def _do_modeled_error_parse(self, response, shape, parsed):
334 raise NotImplementedError(
335 f"{self.__class__.__name__}._do_modeled_error_parse"
336 )
337
338 def _parse_shape(self, shape, node):
339 handler = getattr(
340 self, f'_handle_{shape.type_name}', self._default_handle
341 )
342 return handler(shape, node)
343
344 def _handle_list(self, shape, node):
345 # Enough implementations share list serialization that it's moved
346 # up here in the base class.
347 parsed = []
348 member_shape = shape.member
349 for item in node:
350 parsed.append(self._parse_shape(member_shape, item))
351 return parsed
352
353 def _default_handle(self, shape, value):
354 return value
355
356 def _create_event_stream(self, response, shape):
357 parser = self._event_stream_parser
358 name = response['context'].get('operation_name')
359 return EventStream(response['body'], shape, parser, name)
360
361 def _get_first_key(self, value):
362 return list(value)[0]
363
364 def _has_unknown_tagged_union_member(self, shape, value):
365 if shape.is_tagged_union:
366 cleaned_value = value.copy()
367 cleaned_value.pop("__type", None)
368 cleaned_value = {
369 k: v for k, v in cleaned_value.items() if v is not None
370 }
371 if len(cleaned_value) != 1:
372 error_msg = (
373 "Invalid service response: %s must have one and only "
374 "one member set."
375 )
376 raise ResponseParserError(error_msg % shape.name)
377 tag = self._get_first_key(cleaned_value)
378 serialized_member_names = [
379 shape.members[member].serialization.get('name', member)
380 for member in shape.members
381 ]
382 if tag not in serialized_member_names:
383 msg = (
384 "Received a tagged union response with member "
385 "unknown to client: %s. Please upgrade SDK for full "
386 "response support."
387 )
388 LOG.info(msg % tag)
389 return True
390 return False
391
392 def _handle_unknown_tagged_union_member(self, tag):
393 return {'SDK_UNKNOWN_MEMBER': {'name': tag}}
394
395 def _do_query_compatible_error_parse(self, code, headers, error):
396 """
397 Error response may contain an x-amzn-query-error header to translate
398 errors codes from former `query` services into other protocols. We use this
399 to do our lookup in the errorfactory for modeled errors.
400 """
401 query_error = headers['x-amzn-query-error']
402 query_error_components = query_error.split(';')
403
404 if len(query_error_components) == 2 and query_error_components[0]:
405 error['Error']['QueryErrorCode'] = code
406 error['Error']['Type'] = query_error_components[1]
407 return query_error_components[0]
408 return code
409
410
411class BaseXMLResponseParser(ResponseParser):
412 def __init__(self, timestamp_parser=None, blob_parser=None):
413 super().__init__(timestamp_parser, blob_parser)
414 self._namespace_re = re.compile('{.*}')
415
416 def _handle_map(self, shape, node):
417 parsed = {}
418 key_shape = shape.key
419 value_shape = shape.value
420 key_location_name = key_shape.serialization.get('name') or 'key'
421 value_location_name = value_shape.serialization.get('name') or 'value'
422 if shape.serialization.get('flattened') and not isinstance(node, list):
423 node = [node]
424 for keyval_node in node:
425 for single_pair in keyval_node:
426 # Within each <entry> there's a <key> and a <value>
427 tag_name = self._node_tag(single_pair)
428 if tag_name == key_location_name:
429 key_name = self._parse_shape(key_shape, single_pair)
430 elif tag_name == value_location_name:
431 val_name = self._parse_shape(value_shape, single_pair)
432 else:
433 raise ResponseParserError(f"Unknown tag: {tag_name}")
434 parsed[key_name] = val_name
435 return parsed
436
437 def _node_tag(self, node):
438 return self._namespace_re.sub('', node.tag)
439
440 def _handle_list(self, shape, node):
441 # When we use _build_name_to_xml_node, repeated elements are aggregated
442 # into a list. However, we can't tell the difference between a scalar
443 # value and a single element flattened list. So before calling the
444 # real _handle_list, we know that "node" should actually be a list if
445 # it's flattened, and if it's not, then we make it a one element list.
446 if shape.serialization.get('flattened') and not isinstance(node, list):
447 node = [node]
448 return super()._handle_list(shape, node)
449
450 def _handle_structure(self, shape, node):
451 parsed = {}
452 members = shape.members
453 if shape.metadata.get('exception', False):
454 node = self._get_error_root(node)
455 xml_dict = self._build_name_to_xml_node(node)
456 if self._has_unknown_tagged_union_member(shape, xml_dict):
457 tag = self._get_first_key(xml_dict)
458 return self._handle_unknown_tagged_union_member(tag)
459 for member_name in members:
460 member_shape = members[member_name]
461 if (
462 'location' in member_shape.serialization
463 or member_shape.serialization.get('eventheader')
464 ):
465 # All members with locations have already been handled,
466 # so we don't need to parse these members.
467 continue
468 xml_name = self._member_key_name(member_shape, member_name)
469 member_node = xml_dict.get(xml_name)
470 if member_node is not None:
471 parsed[member_name] = self._parse_shape(
472 member_shape, member_node
473 )
474 elif member_shape.serialization.get('xmlAttribute'):
475 attribs = {}
476 location_name = member_shape.serialization['name']
477 for key, value in node.attrib.items():
478 new_key = self._namespace_re.sub(
479 location_name.split(':')[0] + ':', key
480 )
481 attribs[new_key] = value
482 if location_name in attribs:
483 parsed[member_name] = attribs[location_name]
484 return parsed
485
486 def _get_error_root(self, original_root):
487 if self._node_tag(original_root) == 'ErrorResponse':
488 for child in original_root:
489 if self._node_tag(child) == 'Error':
490 return child
491 return original_root
492
493 def _member_key_name(self, shape, member_name):
494 # This method is needed because we have to special case flattened list
495 # with a serialization name. If this is the case we use the
496 # locationName from the list's member shape as the key name for the
497 # surrounding structure.
498 if shape.type_name == 'list' and shape.serialization.get('flattened'):
499 list_member_serialized_name = shape.member.serialization.get(
500 'name'
501 )
502 if list_member_serialized_name is not None:
503 return list_member_serialized_name
504 serialized_name = shape.serialization.get('name')
505 if serialized_name is not None:
506 return serialized_name
507 return member_name
508
509 def _build_name_to_xml_node(self, parent_node):
510 # If the parent node is actually a list. We should not be trying
511 # to serialize it to a dictionary. Instead, return the first element
512 # in the list.
513 if isinstance(parent_node, list):
514 return self._build_name_to_xml_node(parent_node[0])
515 xml_dict = {}
516 for item in parent_node:
517 key = self._node_tag(item)
518 if key in xml_dict:
519 # If the key already exists, the most natural
520 # way to handle this is to aggregate repeated
521 # keys into a single list.
522 # <foo>1</foo><foo>2</foo> -> {'foo': [Node(1), Node(2)]}
523 if isinstance(xml_dict[key], list):
524 xml_dict[key].append(item)
525 else:
526 # Convert from a scalar to a list.
527 xml_dict[key] = [xml_dict[key], item]
528 else:
529 xml_dict[key] = item
530 return xml_dict
531
532 def _parse_xml_string_to_dom(self, xml_string):
533 try:
534 parser = ETree.XMLParser(
535 target=ETree.TreeBuilder(), encoding=self.DEFAULT_ENCODING
536 )
537 parser.feed(xml_string)
538 root = parser.close()
539 except XMLParseError as e:
540 raise ResponseParserError(
541 f"Unable to parse response ({e}), "
542 f"invalid XML received. Further retries may succeed:\n{xml_string}"
543 )
544 return root
545
546 def _replace_nodes(self, parsed):
547 for key, value in parsed.items():
548 if list(value):
549 sub_dict = self._build_name_to_xml_node(value)
550 parsed[key] = self._replace_nodes(sub_dict)
551 else:
552 parsed[key] = value.text
553 return parsed
554
555 @_text_content
556 def _handle_boolean(self, shape, text):
557 if text == 'true':
558 return True
559 else:
560 return False
561
562 @_text_content
563 def _handle_float(self, shape, text):
564 return float(text)
565
566 @_text_content
567 def _handle_timestamp(self, shape, text):
568 return self._timestamp_parser(text)
569
570 @_text_content
571 def _handle_integer(self, shape, text):
572 return int(text)
573
574 @_text_content
575 def _handle_string(self, shape, text):
576 return text
577
578 @_text_content
579 def _handle_blob(self, shape, text):
580 return self._blob_parser(text)
581
582 _handle_character = _handle_string
583 _handle_double = _handle_float
584 _handle_long = _handle_integer
585
586
587class QueryParser(BaseXMLResponseParser):
588 def _do_error_parse(self, response, shape):
589 xml_contents = response['body']
590 root = self._parse_xml_string_to_dom(xml_contents)
591 parsed = self._build_name_to_xml_node(root)
592 self._replace_nodes(parsed)
593 # Once we've converted xml->dict, we need to make one or two
594 # more adjustments to extract nested errors and to be consistent
595 # with ResponseMetadata for non-error responses:
596 # 1. {"Errors": {"Error": {...}}} -> {"Error": {...}}
597 # 2. {"RequestId": "id"} -> {"ResponseMetadata": {"RequestId": "id"}}
598 if 'Errors' in parsed:
599 parsed.update(parsed.pop('Errors'))
600 if 'RequestId' in parsed:
601 parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')}
602 return parsed
603
604 def _do_modeled_error_parse(self, response, shape):
605 return self._parse_body_as_xml(response, shape, inject_metadata=False)
606
607 def _do_parse(self, response, shape):
608 return self._parse_body_as_xml(response, shape, inject_metadata=True)
609
610 def _parse_body_as_xml(self, response, shape, inject_metadata=True):
611 xml_contents = response['body']
612 root = self._parse_xml_string_to_dom(xml_contents)
613 parsed = {}
614 if shape is not None:
615 start = root
616 if 'resultWrapper' in shape.serialization:
617 start = self._find_result_wrapped_shape(
618 shape.serialization['resultWrapper'], root
619 )
620 parsed = self._parse_shape(shape, start)
621 if inject_metadata:
622 self._inject_response_metadata(root, parsed)
623 return parsed
624
625 def _find_result_wrapped_shape(self, element_name, xml_root_node):
626 mapping = self._build_name_to_xml_node(xml_root_node)
627 return mapping[element_name]
628
629 def _inject_response_metadata(self, node, inject_into):
630 mapping = self._build_name_to_xml_node(node)
631 child_node = mapping.get('ResponseMetadata')
632 if child_node is not None:
633 sub_mapping = self._build_name_to_xml_node(child_node)
634 for key, value in sub_mapping.items():
635 sub_mapping[key] = value.text
636 inject_into['ResponseMetadata'] = sub_mapping
637
638
639class EC2QueryParser(QueryParser):
640 def _inject_response_metadata(self, node, inject_into):
641 mapping = self._build_name_to_xml_node(node)
642 child_node = mapping.get('requestId')
643 if child_node is not None:
644 inject_into['ResponseMetadata'] = {'RequestId': child_node.text}
645
646 def _do_error_parse(self, response, shape):
647 # EC2 errors look like:
648 # <Response>
649 # <Errors>
650 # <Error>
651 # <Code>InvalidInstanceID.Malformed</Code>
652 # <Message>Invalid id: "1343124"</Message>
653 # </Error>
654 # </Errors>
655 # <RequestID>12345</RequestID>
656 # </Response>
657 # This is different from QueryParser in that it's RequestID,
658 # not RequestId
659 original = super()._do_error_parse(response, shape)
660 if 'RequestID' in original:
661 original['ResponseMetadata'] = {
662 'RequestId': original.pop('RequestID')
663 }
664 return original
665
666 def _get_error_root(self, original_root):
667 for child in original_root:
668 if self._node_tag(child) == 'Errors':
669 for errors_child in child:
670 if self._node_tag(errors_child) == 'Error':
671 return errors_child
672 return original_root
673
674
675class BaseJSONParser(ResponseParser):
676 def _handle_structure(self, shape, value):
677 final_parsed = {}
678 if shape.is_document_type:
679 final_parsed = value
680 else:
681 member_shapes = shape.members
682 if value is None:
683 # If the comes across the wire as "null" (None in python),
684 # we should be returning this unchanged, instead of as an
685 # empty dict.
686 return None
687 final_parsed = {}
688 if self._has_unknown_tagged_union_member(shape, value):
689 tag = self._get_first_key(value)
690 return self._handle_unknown_tagged_union_member(tag)
691 for member_name in member_shapes:
692 member_shape = member_shapes[member_name]
693 json_name = member_shape.serialization.get('name', member_name)
694 raw_value = value.get(json_name)
695 if raw_value is not None:
696 final_parsed[member_name] = self._parse_shape(
697 member_shapes[member_name], raw_value
698 )
699 return final_parsed
700
701 def _handle_map(self, shape, value):
702 parsed = {}
703 key_shape = shape.key
704 value_shape = shape.value
705 for key, value in value.items():
706 actual_key = self._parse_shape(key_shape, key)
707 actual_value = self._parse_shape(value_shape, value)
708 parsed[actual_key] = actual_value
709 return parsed
710
711 def _handle_blob(self, shape, value):
712 return self._blob_parser(value)
713
714 def _handle_timestamp(self, shape, value):
715 return self._timestamp_parser(value)
716
717 def _do_error_parse(self, response, shape):
718 body = self._parse_body_as_json(response['body'])
719 error = {"Error": {"Message": '', "Code": ''}, "ResponseMetadata": {}}
720 headers = response['headers']
721 # Error responses can have slightly different structures for json.
722 # The basic structure is:
723 #
724 # {"__type":"ConnectClientException",
725 # "message":"The error message."}
726
727 # The error message can either come in the 'message' or 'Message' key
728 # so we need to check for both.
729 error['Error']['Message'] = body.get(
730 'message', body.get('Message', '')
731 )
732 # if the message did not contain an error code
733 # include the response status code
734 response_code = response.get('status_code')
735
736 code = body.get('__type', response_code and str(response_code))
737 if code is not None:
738 # code has a couple forms as well:
739 # * "com.aws.dynamodb.vAPI#ProvisionedThroughputExceededException"
740 # * "ResourceNotFoundException"
741 if ':' in code:
742 code = code.split(':', 1)[0]
743 if '#' in code:
744 code = code.rsplit('#', 1)[1]
745 if 'x-amzn-query-error' in headers:
746 code = self._do_query_compatible_error_parse(
747 code, headers, error
748 )
749 error['Error']['Code'] = code
750 self._inject_response_metadata(error, response['headers'])
751 return error
752
753 def _inject_response_metadata(self, parsed, headers):
754 if 'x-amzn-requestid' in headers:
755 parsed.setdefault('ResponseMetadata', {})['RequestId'] = headers[
756 'x-amzn-requestid'
757 ]
758
759 def _parse_body_as_json(self, body_contents):
760 if not body_contents:
761 return {}
762 body = body_contents.decode(self.DEFAULT_ENCODING)
763 try:
764 original_parsed = json.loads(body)
765 return original_parsed
766 except ValueError:
767 # if the body cannot be parsed, include
768 # the literal string as the message
769 return {'message': body}
770
771
772class BaseCBORParser(ResponseParser):
773 INDEFINITE_ITEM_ADDITIONAL_INFO = 31
774 BREAK_CODE = 0xFF
775
776 @CachedProperty
777 def major_type_to_parsing_method_map(self):
778 return {
779 0: self._parse_unsigned_integer,
780 1: self._parse_negative_integer,
781 2: self._parse_byte_string,
782 3: self._parse_text_string,
783 4: self._parse_array,
784 5: self._parse_map,
785 6: self._parse_tag,
786 7: self._parse_simple_and_float,
787 }
788
789 def get_peekable_stream_from_bytes(self, bytes):
790 return io.BufferedReader(io.BytesIO(bytes))
791
792 def parse_data_item(self, stream):
793 # CBOR data is divided into "data items", and each data item starts
794 # with an initial byte that describes how the following bytes should be parsed
795 initial_byte = self._read_bytes_as_int(stream, 1)
796 # The highest order three bits of the initial byte describe the CBOR major type
797 major_type = initial_byte >> 5
798 # The lowest order 5 bits of the initial byte tells us more information about
799 # how the bytes should be parsed that will be used
800 additional_info = initial_byte & 0b00011111
801
802 if major_type in self.major_type_to_parsing_method_map:
803 method = self.major_type_to_parsing_method_map[major_type]
804 return method(stream, additional_info)
805 else:
806 raise ResponseParserError(
807 f"Unsupported inital byte found for data item- "
808 f"Major type:{major_type}, Additional info: "
809 f"{additional_info}"
810 )
811
812 # Major type 0 - unsigned integers
813 def _parse_unsigned_integer(self, stream, additional_info):
814 additional_info_to_num_bytes = {
815 24: 1,
816 25: 2,
817 26: 4,
818 27: 8,
819 }
820 # Values under 24 don't need a full byte to be stored; their values are
821 # instead stored as the "additional info" in the initial byte
822 if additional_info < 24:
823 return additional_info
824 elif additional_info in additional_info_to_num_bytes:
825 num_bytes = additional_info_to_num_bytes[additional_info]
826 return self._read_bytes_as_int(stream, num_bytes)
827 else:
828 raise ResponseParserError(
829 "Invalid CBOR integer returned from the service; unparsable "
830 f"additional info found for major type 0 or 1: {additional_info}"
831 )
832
833 # Major type 1 - negative integers
834 def _parse_negative_integer(self, stream, additional_info):
835 return -1 - self._parse_unsigned_integer(stream, additional_info)
836
837 # Major type 2 - byte string
838 def _parse_byte_string(self, stream, additional_info):
839 if additional_info != self.INDEFINITE_ITEM_ADDITIONAL_INFO:
840 length = self._parse_unsigned_integer(stream, additional_info)
841 return self._read_from_stream(stream, length)
842 else:
843 chunks = []
844 while True:
845 if self._handle_break_code(stream):
846 break
847 initial_byte = self._read_bytes_as_int(stream, 1)
848 additional_info = initial_byte & 0b00011111
849 length = self._parse_unsigned_integer(stream, additional_info)
850 chunks.append(self._read_from_stream(stream, length))
851 return b''.join(chunks)
852
853 # Major type 3 - text string
854 def _parse_text_string(self, stream, additional_info):
855 return self._parse_byte_string(stream, additional_info).decode('utf-8')
856
857 # Major type 4 - lists
858 def _parse_array(self, stream, additional_info):
859 if additional_info != self.INDEFINITE_ITEM_ADDITIONAL_INFO:
860 length = self._parse_unsigned_integer(stream, additional_info)
861 return [self.parse_data_item(stream) for _ in range(length)]
862 else:
863 items = []
864 while not self._handle_break_code(stream):
865 items.append(self.parse_data_item(stream))
866 return items
867
868 # Major type 5 - maps
869 def _parse_map(self, stream, additional_info):
870 items = {}
871 if additional_info != self.INDEFINITE_ITEM_ADDITIONAL_INFO:
872 length = self._parse_unsigned_integer(stream, additional_info)
873 for _ in range(length):
874 self._parse_key_value_pair(stream, items)
875 return items
876
877 else:
878 while not self._handle_break_code(stream):
879 self._parse_key_value_pair(stream, items)
880 return items
881
882 def _parse_key_value_pair(self, stream, items):
883 key = self.parse_data_item(stream)
884 value = self.parse_data_item(stream)
885 if value is not None:
886 items[key] = value
887
888 # Major type 6 is tags. The only tag we currently support is tag 1 for unix
889 # timestamps
890 def _parse_tag(self, stream, additional_info):
891 tag = self._parse_unsigned_integer(stream, additional_info)
892 value = self.parse_data_item(stream)
893 if tag == 1: # Epoch-based date/time in milliseconds
894 return self._parse_datetime(value)
895 else:
896 raise ResponseParserError(
897 f"Found CBOR tag not supported by botocore: {tag}"
898 )
899
900 def _parse_datetime(self, value):
901 if isinstance(value, (int, float)):
902 return self._timestamp_parser(value)
903 else:
904 raise ResponseParserError(
905 f"Unable to parse datetime value: {value}"
906 )
907
908 # Major type 7 includes floats and "simple" types. Supported simple types are
909 # currently boolean values, CBOR's null, and CBOR's undefined type. All other
910 # values are either floats or invalid.
911 def _parse_simple_and_float(self, stream, additional_info):
912 # For major type 7, values 20-23 correspond to CBOR "simple" values
913 additional_info_simple_values = {
914 20: False, # CBOR false
915 21: True, # CBOR true
916 22: None, # CBOR null
917 23: None, # CBOR undefined
918 }
919 # First we check if the additional info corresponds to a supported simple value
920 if additional_info in additional_info_simple_values:
921 return additional_info_simple_values[additional_info]
922
923 # If it's not a simple value, we need to parse it into the correct format and
924 # number fo bytes
925 float_formats = {
926 25: ('>e', 2),
927 26: ('>f', 4),
928 27: ('>d', 8),
929 }
930
931 if additional_info in float_formats:
932 float_format, num_bytes = float_formats[additional_info]
933 return struct.unpack(
934 float_format, self._read_from_stream(stream, num_bytes)
935 )[0]
936 raise ResponseParserError(
937 f"Invalid additional info found for major type 7: {additional_info}. "
938 f"This indicates an unsupported simple type or an indefinite float value"
939 )
940
941 # This helper method is intended for use when parsing indefinite length items.
942 # It does nothing if the next byte is not the break code. If the next byte is
943 # the break code, it advances past that byte and returns True so the calling
944 # method knows to stop parsing that data item.
945 def _handle_break_code(self, stream):
946 if int.from_bytes(stream.peek(1)[:1], 'big') == self.BREAK_CODE:
947 stream.seek(1, os.SEEK_CUR)
948 return True
949
950 def _read_bytes_as_int(self, stream, num_bytes):
951 byte = self._read_from_stream(stream, num_bytes)
952 return int.from_bytes(byte, 'big')
953
954 def _read_from_stream(self, stream, num_bytes):
955 value = stream.read(num_bytes)
956 if len(value) != num_bytes:
957 raise ResponseParserError(
958 "End of stream reached; this indicates a "
959 "malformed CBOR response from the server or an "
960 "issue in botocore"
961 )
962 return value
963
964
965class BaseEventStreamParser(ResponseParser):
966 def _do_parse(self, response, shape):
967 final_parsed = {}
968 if shape.serialization.get('eventstream'):
969 event_type = response['headers'].get(':event-type')
970 event_shape = shape.members.get(event_type)
971 if event_shape:
972 final_parsed[event_type] = self._do_parse(
973 response, event_shape
974 )
975 else:
976 self._parse_non_payload_attrs(
977 response, shape, shape.members, final_parsed
978 )
979 self._parse_payload(response, shape, shape.members, final_parsed)
980 return final_parsed
981
982 def _do_error_parse(self, response, shape):
983 exception_type = response['headers'].get(':exception-type')
984 exception_shape = shape.members.get(exception_type)
985 if exception_shape is not None:
986 original_parsed = self._initial_body_parse(response['body'])
987 body = self._parse_shape(exception_shape, original_parsed)
988 error = {
989 'Error': {
990 'Code': exception_type,
991 'Message': body.get('Message', body.get('message', '')),
992 }
993 }
994 else:
995 error = {
996 'Error': {
997 'Code': response['headers'].get(':error-code', ''),
998 'Message': response['headers'].get(':error-message', ''),
999 }
1000 }
1001 return error
1002
1003 def _parse_payload(self, response, shape, member_shapes, final_parsed):
1004 if shape.serialization.get('event'):
1005 for name in member_shapes:
1006 member_shape = member_shapes[name]
1007 if member_shape.serialization.get('eventpayload'):
1008 body = response['body']
1009 if member_shape.type_name == 'blob':
1010 parsed_body = body
1011 elif member_shape.type_name == 'string':
1012 parsed_body = body.decode(self.DEFAULT_ENCODING)
1013 else:
1014 raw_parse = self._initial_body_parse(body)
1015 parsed_body = self._parse_shape(
1016 member_shape, raw_parse
1017 )
1018 final_parsed[name] = parsed_body
1019 return
1020 # If we didn't find an explicit payload, use the current shape
1021 original_parsed = self._initial_body_parse(response['body'])
1022 body_parsed = self._parse_shape(shape, original_parsed)
1023 final_parsed.update(body_parsed)
1024
1025 def _parse_non_payload_attrs(
1026 self, response, shape, member_shapes, final_parsed
1027 ):
1028 headers = response['headers']
1029 for name in member_shapes:
1030 member_shape = member_shapes[name]
1031 if member_shape.serialization.get('eventheader'):
1032 if name in headers:
1033 value = headers[name]
1034 if member_shape.type_name == 'timestamp':
1035 # Event stream timestamps are an in milleseconds so we
1036 # divide by 1000 to convert to seconds.
1037 value = self._timestamp_parser(value / 1000.0)
1038 final_parsed[name] = value
1039
1040 def _initial_body_parse(self, body_contents):
1041 # This method should do the initial xml/json parsing of the
1042 # body. We still need to walk the parsed body in order
1043 # to convert types, but this method will do the first round
1044 # of parsing.
1045 raise NotImplementedError("_initial_body_parse")
1046
1047
1048class EventStreamJSONParser(BaseEventStreamParser, BaseJSONParser):
1049 def _initial_body_parse(self, body_contents):
1050 return self._parse_body_as_json(body_contents)
1051
1052
1053class EventStreamXMLParser(BaseEventStreamParser, BaseXMLResponseParser):
1054 def _initial_body_parse(self, xml_string):
1055 if not xml_string:
1056 return ETree.Element('')
1057 return self._parse_xml_string_to_dom(xml_string)
1058
1059
1060class EventStreamCBORParser(BaseEventStreamParser, BaseCBORParser):
1061 def _initial_body_parse(self, body_contents):
1062 if body_contents == b'':
1063 return {}
1064 return self.parse_data_item(
1065 self.get_peekable_stream_from_bytes(body_contents)
1066 )
1067
1068
1069class JSONParser(BaseJSONParser):
1070 EVENT_STREAM_PARSER_CLS = EventStreamJSONParser
1071
1072 """Response parser for the "json" protocol."""
1073
1074 def _do_parse(self, response, shape):
1075 parsed = {}
1076 if shape is not None:
1077 event_name = shape.event_stream_name
1078 if event_name:
1079 parsed = self._handle_event_stream(response, shape, event_name)
1080 else:
1081 parsed = self._handle_json_body(response['body'], shape)
1082 self._inject_response_metadata(parsed, response['headers'])
1083 return parsed
1084
1085 def _do_modeled_error_parse(self, response, shape):
1086 return self._handle_json_body(response['body'], shape)
1087
1088 def _handle_event_stream(self, response, shape, event_name):
1089 event_stream_shape = shape.members[event_name]
1090 event_stream = self._create_event_stream(response, event_stream_shape)
1091 try:
1092 event = event_stream.get_initial_response()
1093 except NoInitialResponseError:
1094 error_msg = 'First event was not of type initial-response'
1095 raise ResponseParserError(error_msg)
1096 parsed = self._handle_json_body(event.payload, shape)
1097 parsed[event_name] = event_stream
1098 return parsed
1099
1100 def _handle_json_body(self, raw_body, shape):
1101 # The json.loads() gives us the primitive JSON types,
1102 # but we need to traverse the parsed JSON data to convert
1103 # to richer types (blobs, timestamps, etc.
1104 parsed_json = self._parse_body_as_json(raw_body)
1105 return self._parse_shape(shape, parsed_json)
1106
1107
1108class BaseRestParser(ResponseParser):
1109 def _do_parse(self, response, shape):
1110 final_parsed = {}
1111 final_parsed['ResponseMetadata'] = self._populate_response_metadata(
1112 response
1113 )
1114 self._add_modeled_parse(response, shape, final_parsed)
1115 return final_parsed
1116
1117 def _add_modeled_parse(self, response, shape, final_parsed):
1118 if shape is None:
1119 return final_parsed
1120 member_shapes = shape.members
1121 self._parse_non_payload_attrs(
1122 response, shape, member_shapes, final_parsed
1123 )
1124 self._parse_payload(response, shape, member_shapes, final_parsed)
1125
1126 def _do_modeled_error_parse(self, response, shape):
1127 final_parsed = {}
1128 self._add_modeled_parse(response, shape, final_parsed)
1129 return final_parsed
1130
1131 def _populate_response_metadata(self, response):
1132 metadata = {}
1133 headers = response['headers']
1134 if 'x-amzn-requestid' in headers:
1135 metadata['RequestId'] = headers['x-amzn-requestid']
1136 elif 'x-amz-request-id' in headers:
1137 metadata['RequestId'] = headers['x-amz-request-id']
1138 # HostId is what it's called whenever this value is returned
1139 # in an XML response body, so to be consistent, we'll always
1140 # call is HostId.
1141 metadata['HostId'] = headers.get('x-amz-id-2', '')
1142 return metadata
1143
1144 def _parse_payload(self, response, shape, member_shapes, final_parsed):
1145 if 'payload' in shape.serialization:
1146 # If a payload is specified in the output shape, then only that
1147 # shape is used for the body payload.
1148 payload_member_name = shape.serialization['payload']
1149 body_shape = member_shapes[payload_member_name]
1150 if body_shape.serialization.get('eventstream'):
1151 body = self._create_event_stream(response, body_shape)
1152 final_parsed[payload_member_name] = body
1153 elif body_shape.type_name in ['string', 'blob']:
1154 # This is a stream
1155 body = response['body']
1156 if isinstance(body, bytes):
1157 body = body.decode(self.DEFAULT_ENCODING)
1158 final_parsed[payload_member_name] = body
1159 else:
1160 original_parsed = self._initial_body_parse(response['body'])
1161 final_parsed[payload_member_name] = self._parse_shape(
1162 body_shape, original_parsed
1163 )
1164 else:
1165 original_parsed = self._initial_body_parse(response['body'])
1166 body_parsed = self._parse_shape(shape, original_parsed)
1167 final_parsed.update(body_parsed)
1168
1169 def _parse_non_payload_attrs(
1170 self, response, shape, member_shapes, final_parsed
1171 ):
1172 headers = response['headers']
1173 for name in member_shapes:
1174 member_shape = member_shapes[name]
1175 location = member_shape.serialization.get('location')
1176 if location is None:
1177 continue
1178 elif location == 'statusCode':
1179 final_parsed[name] = self._parse_shape(
1180 member_shape, response['status_code']
1181 )
1182 elif location == 'headers':
1183 final_parsed[name] = self._parse_header_map(
1184 member_shape, headers
1185 )
1186 elif location == 'header':
1187 header_name = member_shape.serialization.get('name', name)
1188 if header_name in headers:
1189 final_parsed[name] = self._parse_shape(
1190 member_shape, headers[header_name]
1191 )
1192
1193 def _parse_header_map(self, shape, headers):
1194 # Note that headers are case insensitive, so we .lower()
1195 # all header names and header prefixes.
1196 parsed = {}
1197 prefix = shape.serialization.get('name', '').lower()
1198 for header_name in headers:
1199 if header_name.lower().startswith(prefix):
1200 # The key name inserted into the parsed hash
1201 # strips off the prefix.
1202 name = header_name[len(prefix) :]
1203 parsed[name] = headers[header_name]
1204 return parsed
1205
1206 def _initial_body_parse(self, body_contents):
1207 # This method should do the initial xml/json parsing of the
1208 # body. We still need to walk the parsed body in order
1209 # to convert types, but this method will do the first round
1210 # of parsing.
1211 raise NotImplementedError("_initial_body_parse")
1212
1213 def _handle_string(self, shape, value):
1214 parsed = value
1215 if is_json_value_header(shape):
1216 decoded = base64.b64decode(value).decode(self.DEFAULT_ENCODING)
1217 parsed = json.loads(decoded)
1218 return parsed
1219
1220 def _handle_list(self, shape, node):
1221 location = shape.serialization.get('location')
1222 if location == 'header' and not isinstance(node, list):
1223 # List in headers may be a comma separated string as per RFC7230
1224 node = [e.strip() for e in node.split(',')]
1225 return super()._handle_list(shape, node)
1226
1227
1228class BaseRpcV2Parser(ResponseParser):
1229 def _do_parse(self, response, shape):
1230 parsed = {}
1231 if shape is not None:
1232 event_stream_name = shape.event_stream_name
1233 if event_stream_name:
1234 parsed = self._handle_event_stream(
1235 response, shape, event_stream_name
1236 )
1237 else:
1238 parsed = {}
1239 self._parse_payload(response, shape, parsed)
1240 parsed['ResponseMetadata'] = self._populate_response_metadata(
1241 response
1242 )
1243 return parsed
1244
1245 def _add_modeled_parse(self, response, shape, final_parsed):
1246 if shape is None:
1247 return final_parsed
1248 self._parse_payload(response, shape, final_parsed)
1249
1250 def _do_modeled_error_parse(self, response, shape):
1251 final_parsed = {}
1252 self._add_modeled_parse(response, shape, final_parsed)
1253 return final_parsed
1254
1255 def _populate_response_metadata(self, response):
1256 metadata = {}
1257 headers = response['headers']
1258 if 'x-amzn-requestid' in headers:
1259 metadata['RequestId'] = headers['x-amzn-requestid']
1260 return metadata
1261
1262 def _handle_structure(self, shape, node):
1263 parsed = {}
1264 members = shape.members
1265 if shape.is_tagged_union:
1266 cleaned_value = node.copy()
1267 cleaned_value.pop("__type", None)
1268 cleaned_value = {
1269 k: v for k, v in cleaned_value.items() if v is not None
1270 }
1271 if len(cleaned_value) != 1:
1272 error_msg = (
1273 "Invalid service response: %s must have one and only "
1274 "one member set."
1275 )
1276 raise ResponseParserError(error_msg % shape.name)
1277 for member_name in members:
1278 member_shape = members[member_name]
1279 member_node = node.get(member_name)
1280 if member_node is not None:
1281 parsed[member_name] = self._parse_shape(
1282 member_shape, member_node
1283 )
1284 return parsed
1285
1286 def _parse_payload(self, response, shape, final_parsed):
1287 original_parsed = self._initial_body_parse(response['body'])
1288 body_parsed = self._parse_shape(shape, original_parsed)
1289 final_parsed.update(body_parsed)
1290
1291 def _initial_body_parse(self, body_contents):
1292 # This method should do the initial parsing of the
1293 # body. We still need to walk the parsed body in order
1294 # to convert types, but this method will do the first round
1295 # of parsing.
1296 raise NotImplementedError("_initial_body_parse")
1297
1298
1299class RestJSONParser(BaseRestParser, BaseJSONParser):
1300 EVENT_STREAM_PARSER_CLS = EventStreamJSONParser
1301
1302 def _initial_body_parse(self, body_contents):
1303 return self._parse_body_as_json(body_contents)
1304
1305 def _do_error_parse(self, response, shape):
1306 error = super()._do_error_parse(response, shape)
1307 self._inject_error_code(error, response)
1308 return error
1309
1310 def _inject_error_code(self, error, response):
1311 # The "Code" value can come from either a response
1312 # header or a value in the JSON body.
1313 body = self._initial_body_parse(response['body'])
1314 code = None
1315 if 'x-amzn-errortype' in response['headers']:
1316 code = response['headers']['x-amzn-errortype']
1317 elif 'code' in body or 'Code' in body:
1318 code = body.get('code', body.get('Code', ''))
1319 if code is None:
1320 return
1321 if isinstance(code, str):
1322 code = code.split(':', 1)[0].rsplit('#', 1)[-1]
1323 error['Error']['Code'] = code
1324
1325 def _handle_boolean(self, shape, value):
1326 return ensure_boolean(value)
1327
1328 def _handle_integer(self, shape, value):
1329 return int(value)
1330
1331 def _handle_float(self, shape, value):
1332 return float(value)
1333
1334 _handle_long = _handle_integer
1335 _handle_double = _handle_float
1336
1337
1338class RpcV2CBORParser(BaseRpcV2Parser, BaseCBORParser):
1339 EVENT_STREAM_PARSER_CLS = EventStreamCBORParser
1340
1341 def _initial_body_parse(self, body_contents):
1342 if body_contents == b'':
1343 return body_contents
1344 body_contents_stream = self.get_peekable_stream_from_bytes(
1345 body_contents
1346 )
1347 return self.parse_data_item(body_contents_stream)
1348
1349 def _do_error_parse(self, response, shape):
1350 body = self._initial_body_parse(response['body'])
1351 error = {
1352 "Error": {
1353 "Message": body.get('message', body.get('Message', '')),
1354 "Code": '',
1355 },
1356 "ResponseMetadata": {},
1357 }
1358 headers = response['headers']
1359
1360 code = body.get('__type')
1361 if code is None:
1362 response_code = response.get('status_code')
1363 if response_code is not None:
1364 code = str(response_code)
1365 if code is not None:
1366 if ':' in code:
1367 code = code.split(':', 1)[0]
1368 if '#' in code:
1369 code = code.rsplit('#', 1)[1]
1370 if 'x-amzn-query-error' in headers:
1371 code = self._do_query_compatible_error_parse(
1372 code, headers, error
1373 )
1374 error['Error']['Code'] = code
1375 if 'x-amzn-requestid' in headers:
1376 error.setdefault('ResponseMetadata', {})['RequestId'] = headers[
1377 'x-amzn-requestid'
1378 ]
1379 return error
1380
1381 def _handle_event_stream(self, response, shape, event_name):
1382 event_stream_shape = shape.members[event_name]
1383 event_stream = self._create_event_stream(response, event_stream_shape)
1384 try:
1385 event = event_stream.get_initial_response()
1386 except NoInitialResponseError:
1387 error_msg = 'First event was not of type initial-response'
1388 raise ResponseParserError(error_msg)
1389 parsed = self._initial_body_parse(event.payload)
1390 parsed[event_name] = event_stream
1391 return parsed
1392
1393
1394class RestXMLParser(BaseRestParser, BaseXMLResponseParser):
1395 EVENT_STREAM_PARSER_CLS = EventStreamXMLParser
1396
1397 def _initial_body_parse(self, xml_string):
1398 if not xml_string:
1399 return ETree.Element('')
1400 return self._parse_xml_string_to_dom(xml_string)
1401
1402 def _do_error_parse(self, response, shape):
1403 # We're trying to be service agnostic here, but S3 does have a slightly
1404 # different response structure for its errors compared to other
1405 # rest-xml serivces (route53/cloudfront). We handle this by just
1406 # trying to parse both forms.
1407 # First:
1408 # <ErrorResponse xmlns="...">
1409 # <Error>
1410 # <Type>Sender</Type>
1411 # <Code>InvalidInput</Code>
1412 # <Message>Invalid resource type: foo</Message>
1413 # </Error>
1414 # <RequestId>request-id</RequestId>
1415 # </ErrorResponse>
1416 if response['body']:
1417 # If the body ends up being invalid xml, the xml parser should not
1418 # blow up. It should at least try to pull information about the
1419 # the error response from other sources like the HTTP status code.
1420 try:
1421 return self._parse_error_from_body(response)
1422 except ResponseParserError:
1423 LOG.debug(
1424 'Exception caught when parsing error response body:',
1425 exc_info=True,
1426 )
1427 return self._parse_error_from_http_status(response)
1428
1429 def _parse_error_from_http_status(self, response):
1430 return {
1431 'Error': {
1432 'Code': str(response['status_code']),
1433 'Message': http.client.responses.get(
1434 response['status_code'], ''
1435 ),
1436 },
1437 'ResponseMetadata': {
1438 'RequestId': response['headers'].get('x-amz-request-id', ''),
1439 'HostId': response['headers'].get('x-amz-id-2', ''),
1440 },
1441 }
1442
1443 def _parse_error_from_body(self, response):
1444 xml_contents = response['body']
1445 root = self._parse_xml_string_to_dom(xml_contents)
1446 parsed = self._build_name_to_xml_node(root)
1447 self._replace_nodes(parsed)
1448 if root.tag == 'Error':
1449 # This is an S3 error response. First we'll populate the
1450 # response metadata.
1451 metadata = self._populate_response_metadata(response)
1452 # The RequestId and the HostId are already in the
1453 # ResponseMetadata, but are also duplicated in the XML
1454 # body. We don't need these values in both places,
1455 # we'll just remove them from the parsed XML body.
1456 parsed.pop('RequestId', '')
1457 parsed.pop('HostId', '')
1458 return {'Error': parsed, 'ResponseMetadata': metadata}
1459 elif 'RequestId' in parsed:
1460 # Other rest-xml services:
1461 parsed['ResponseMetadata'] = {'RequestId': parsed.pop('RequestId')}
1462 default = {'Error': {'Message': '', 'Code': ''}}
1463 merge_dicts(default, parsed)
1464 return default
1465
1466 @_text_content
1467 def _handle_string(self, shape, text):
1468 text = super()._handle_string(shape, text)
1469 return text
1470
1471
1472PROTOCOL_PARSERS = {
1473 'ec2': EC2QueryParser,
1474 'query': QueryParser,
1475 'json': JSONParser,
1476 'rest-json': RestJSONParser,
1477 'rest-xml': RestXMLParser,
1478 'smithy-rpc-v2-cbor': RpcV2CBORParser,
1479}