1# Copyright 2015 Google LLC 
    2# 
    3# Licensed under the Apache License, Version 2.0 (the "License"); 
    4# you may not use this file except in compliance with the License. 
    5# You may obtain a copy of the License at 
    6# 
    7#     http://www.apache.org/licenses/LICENSE-2.0 
    8# 
    9# Unless required by applicable law or agreed to in writing, software 
    10# distributed under the License is distributed on an "AS IS" BASIS, 
    11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
    12# See the License for the specific language governing permissions and 
    13# limitations under the License. 
    14 
    15"""Iterators for paging through paged API methods. 
    16 
    17These iterators simplify the process of paging through API responses 
    18where the request takes a page token and the response is a list of results with 
    19a token for the next page. See `list pagination`_ in the Google API Style Guide 
    20for more details. 
    21 
    22.. _list pagination: 
    23    https://cloud.google.com/apis/design/design_patterns#list_pagination 
    24 
    25API clients that have methods that follow the list pagination pattern can 
    26return an :class:`.Iterator`. You can use this iterator to get **all** of 
    27the results across all pages:: 
    28 
    29    >>> results_iterator = client.list_resources() 
    30    >>> list(results_iterator)  # Convert to a list (consumes all values). 
    31 
    32Or you can walk your way through items and call off the search early if 
    33you find what you're looking for (resulting in possibly fewer requests):: 
    34 
    35    >>> for resource in results_iterator: 
    36    ...     print(resource.name) 
    37    ...     if not resource.is_valid: 
    38    ...         break 
    39 
    40At any point, you may check the number of items consumed by referencing the 
    41``num_results`` property of the iterator:: 
    42 
    43    >>> for my_item in results_iterator: 
    44    ...     if results_iterator.num_results >= 10: 
    45    ...         break 
    46 
    47When iterating, not every new item will send a request to the server. 
    48To iterate based on each page of items (where a page corresponds to 
    49a request):: 
    50 
    51    >>> for page in results_iterator.pages: 
    52    ...     print('=' * 20) 
    53    ...     print('    Page number: {:d}'.format(iterator.page_number)) 
    54    ...     print('  Items in page: {:d}'.format(page.num_items)) 
    55    ...     print('     First item: {!r}'.format(next(page))) 
    56    ...     print('Items remaining: {:d}'.format(page.remaining)) 
    57    ...     print('Next page token: {}'.format(iterator.next_page_token)) 
    58    ==================== 
    59        Page number: 1 
    60      Items in page: 1 
    61         First item: <MyItemClass at 0x7f1d3cccf690> 
    62    Items remaining: 0 
    63    Next page token: eav1OzQB0OM8rLdGXOEsyQWSG 
    64    ==================== 
    65        Page number: 2 
    66      Items in page: 19 
    67         First item: <MyItemClass at 0x7f1d3cccffd0> 
    68    Items remaining: 18 
    69    Next page token: None 
    70 
    71Then, for each page you can get all the resources on that page by iterating 
    72through it or using :func:`list`:: 
    73 
    74    >>> list(page) 
    75    [ 
    76        <MyItemClass at 0x7fd64a098ad0>, 
    77        <MyItemClass at 0x7fd64a098ed0>, 
    78        <MyItemClass at 0x7fd64a098e90>, 
    79    ] 
    80""" 
    81 
    82import abc 
    83 
    84 
    85class Page(object): 
    86    """Single page of results in an iterator. 
    87 
    88    Args: 
    89        parent (google.api_core.page_iterator.Iterator): The iterator that owns 
    90            the current page. 
    91        items (Sequence[Any]): An iterable (that also defines __len__) of items 
    92            from a raw API response. 
    93        item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]): 
    94            Callable to convert an item from the type in the raw API response 
    95            into the native object. Will be called with the iterator and a 
    96            single item. 
    97        raw_page Optional[google.protobuf.message.Message]: 
    98            The raw page response. 
    99    """ 
    100 
    101    def __init__(self, parent, items, item_to_value, raw_page=None): 
    102        self._parent = parent 
    103        self._num_items = len(items) 
    104        self._remaining = self._num_items 
    105        self._item_iter = iter(items) 
    106        self._item_to_value = item_to_value 
    107        self._raw_page = raw_page 
    108 
    109    @property 
    110    def raw_page(self): 
    111        """google.protobuf.message.Message""" 
    112        return self._raw_page 
    113 
    114    @property 
    115    def num_items(self): 
    116        """int: Total items in the page.""" 
    117        return self._num_items 
    118 
    119    @property 
    120    def remaining(self): 
    121        """int: Remaining items in the page.""" 
    122        return self._remaining 
    123 
    124    def __iter__(self): 
    125        """The :class:`Page` is an iterator of items.""" 
    126        return self 
    127 
    128    def __next__(self): 
    129        """Get the next value in the page.""" 
    130        item = next(self._item_iter) 
    131        result = self._item_to_value(self._parent, item) 
    132        # Since we've successfully got the next value from the 
    133        # iterator, we update the number of remaining. 
    134        self._remaining -= 1 
    135        return result 
    136 
    137 
    138def _item_to_value_identity(iterator, item): 
    139    """An item to value transformer that returns the item un-changed.""" 
    140    # pylint: disable=unused-argument 
    141    # We are conforming to the interface defined by Iterator. 
    142    return item 
    143 
    144 
    145class Iterator(object, metaclass=abc.ABCMeta): 
    146    """A generic class for iterating through API list responses. 
    147 
    148    Args: 
    149        client(google.cloud.client.Client): The API client. 
    150        item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]): 
    151            Callable to convert an item from the type in the raw API response 
    152            into the native object. Will be called with the iterator and a 
    153            single item. 
    154        page_token (str): A token identifying a page in a result set to start 
    155            fetching results from. 
    156        max_results (int): The maximum number of results to fetch. 
    157    """ 
    158 
    159    def __init__( 
    160        self, 
    161        client, 
    162        item_to_value=_item_to_value_identity, 
    163        page_token=None, 
    164        max_results=None, 
    165    ): 
    166        self._started = False 
    167        self.__active_iterator = None 
    168 
    169        self.client = client 
    170        """Optional[Any]: The client that created this iterator.""" 
    171        self.item_to_value = item_to_value 
    172        """Callable[Iterator, Any]: Callable to convert an item from the type 
    173            in the raw API response into the native object. Will be called with 
    174            the iterator and a 
    175            single item. 
    176        """ 
    177        self.max_results = max_results 
    178        """int: The maximum number of results to fetch""" 
    179 
    180        # The attributes below will change over the life of the iterator. 
    181        self.page_number = 0 
    182        """int: The current page of results.""" 
    183        self.next_page_token = page_token 
    184        """str: The token for the next page of results. If this is set before 
    185            the iterator starts, it effectively offsets the iterator to a 
    186            specific starting point.""" 
    187        self.num_results = 0 
    188        """int: The total number of results fetched so far.""" 
    189 
    190    @property 
    191    def pages(self): 
    192        """Iterator of pages in the response. 
    193 
    194        returns: 
    195            types.GeneratorType[google.api_core.page_iterator.Page]: A 
    196                generator of page instances. 
    197 
    198        raises: 
    199            ValueError: If the iterator has already been started. 
    200        """ 
    201        if self._started: 
    202            raise ValueError("Iterator has already started", self) 
    203        self._started = True 
    204        return self._page_iter(increment=True) 
    205 
    206    def _items_iter(self): 
    207        """Iterator for each item returned.""" 
    208        for page in self._page_iter(increment=False): 
    209            for item in page: 
    210                self.num_results += 1 
    211                yield item 
    212 
    213    def __iter__(self): 
    214        """Iterator for each item returned. 
    215 
    216        Returns: 
    217            types.GeneratorType[Any]: A generator of items from the API. 
    218 
    219        Raises: 
    220            ValueError: If the iterator has already been started. 
    221        """ 
    222        if self._started: 
    223            raise ValueError("Iterator has already started", self) 
    224        self._started = True 
    225        return self._items_iter() 
    226 
    227    def __next__(self): 
    228        if self.__active_iterator is None: 
    229            self.__active_iterator = iter(self) 
    230        return next(self.__active_iterator) 
    231 
    232    def _page_iter(self, increment): 
    233        """Generator of pages of API responses. 
    234 
    235        Args: 
    236            increment (bool): Flag indicating if the total number of results 
    237                should be incremented on each page. This is useful since a page 
    238                iterator will want to increment by results per page while an 
    239                items iterator will want to increment per item. 
    240 
    241        Yields: 
    242            Page: each page of items from the API. 
    243        """ 
    244        page = self._next_page() 
    245        while page is not None: 
    246            self.page_number += 1 
    247            if increment: 
    248                self.num_results += page.num_items 
    249            yield page 
    250            page = self._next_page() 
    251 
    252    @abc.abstractmethod 
    253    def _next_page(self): 
    254        """Get the next page in the iterator. 
    255 
    256        This does nothing and is intended to be over-ridden by subclasses 
    257        to return the next :class:`Page`. 
    258 
    259        Raises: 
    260            NotImplementedError: Always, this method is abstract. 
    261        """ 
    262        raise NotImplementedError 
    263 
    264 
    265def _do_nothing_page_start(iterator, page, response): 
    266    """Helper to provide custom behavior after a :class:`Page` is started. 
    267 
    268    This is a do-nothing stand-in as the default value. 
    269 
    270    Args: 
    271        iterator (Iterator): An iterator that holds some request info. 
    272        page (Page): The page that was just created. 
    273        response (Any): The API response for a page. 
    274    """ 
    275    # pylint: disable=unused-argument 
    276    pass 
    277 
    278 
    279class HTTPIterator(Iterator): 
    280    """A generic class for iterating through HTTP/JSON API list responses. 
    281 
    282    To make an iterator work, you'll need to provide a way to convert a JSON 
    283    item returned from the API into the object of your choice (via 
    284    ``item_to_value``). You also may need to specify a custom ``items_key`` so 
    285    that a given response (containing a page of results) can be parsed into an 
    286    iterable page of the actual objects you want. 
    287 
    288    Args: 
    289        client (google.cloud.client.Client): The API client. 
    290        api_request (Callable): The function to use to make API requests. 
    291            Generally, this will be 
    292            :meth:`google.cloud._http.JSONConnection.api_request`. 
    293        path (str): The method path to query for the list of items. 
    294        item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]): 
    295            Callable to convert an item from the type in the JSON response into 
    296            a native object. Will be called with the iterator and a single 
    297            item. 
    298        items_key (str): The key in the API response where the list of items 
    299            can be found. 
    300        page_token (str): A token identifying a page in a result set to start 
    301            fetching results from. 
    302        page_size (int): The maximum number of results to fetch per page 
    303        max_results (int): The maximum number of results to fetch 
    304        extra_params (dict): Extra query string parameters for the 
    305            API call. 
    306        page_start (Callable[ 
    307            google.api_core.page_iterator.Iterator, 
    308            google.api_core.page_iterator.Page, dict]): Callable to provide 
    309            any special behavior after a new page has been created. Assumed 
    310            signature takes the :class:`.Iterator` that started the page, 
    311            the :class:`.Page` that was started and the dictionary containing 
    312            the page response. 
    313        next_token (str): The name of the field used in the response for page 
    314            tokens. 
    315 
    316    .. autoattribute:: pages 
    317    """ 
    318 
    319    _DEFAULT_ITEMS_KEY = "items" 
    320    _PAGE_TOKEN = "pageToken" 
    321    _MAX_RESULTS = "maxResults" 
    322    _NEXT_TOKEN = "nextPageToken" 
    323    _RESERVED_PARAMS = frozenset([_PAGE_TOKEN]) 
    324    _HTTP_METHOD = "GET" 
    325 
    326    def __init__( 
    327        self, 
    328        client, 
    329        api_request, 
    330        path, 
    331        item_to_value, 
    332        items_key=_DEFAULT_ITEMS_KEY, 
    333        page_token=None, 
    334        page_size=None, 
    335        max_results=None, 
    336        extra_params=None, 
    337        page_start=_do_nothing_page_start, 
    338        next_token=_NEXT_TOKEN, 
    339    ): 
    340        super(HTTPIterator, self).__init__( 
    341            client, item_to_value, page_token=page_token, max_results=max_results 
    342        ) 
    343        self.api_request = api_request 
    344        self.path = path 
    345        self._items_key = items_key 
    346        self.extra_params = extra_params 
    347        self._page_size = page_size 
    348        self._page_start = page_start 
    349        self._next_token = next_token 
    350        # Verify inputs / provide defaults. 
    351        if self.extra_params is None: 
    352            self.extra_params = {} 
    353        self._verify_params() 
    354 
    355    def _verify_params(self): 
    356        """Verifies the parameters don't use any reserved parameter. 
    357 
    358        Raises: 
    359            ValueError: If a reserved parameter is used. 
    360        """ 
    361        reserved_in_use = self._RESERVED_PARAMS.intersection(self.extra_params) 
    362        if reserved_in_use: 
    363            raise ValueError("Using a reserved parameter", reserved_in_use) 
    364 
    365    def _next_page(self): 
    366        """Get the next page in the iterator. 
    367 
    368        Returns: 
    369            Optional[Page]: The next page in the iterator or :data:`None` if 
    370                there are no pages left. 
    371        """ 
    372        if self._has_next_page(): 
    373            response = self._get_next_page_response() 
    374            items = response.get(self._items_key, ()) 
    375            page = Page(self, items, self.item_to_value, raw_page=response) 
    376            self._page_start(self, page, response) 
    377            self.next_page_token = response.get(self._next_token) 
    378            return page 
    379        else: 
    380            return None 
    381 
    382    def _has_next_page(self): 
    383        """Determines whether or not there are more pages with results. 
    384 
    385        Returns: 
    386            bool: Whether the iterator has more pages. 
    387        """ 
    388        if self.page_number == 0: 
    389            return True 
    390 
    391        if self.max_results is not None: 
    392            if self.num_results >= self.max_results: 
    393                return False 
    394 
    395        return self.next_page_token is not None 
    396 
    397    def _get_query_params(self): 
    398        """Getter for query parameters for the next request. 
    399 
    400        Returns: 
    401            dict: A dictionary of query parameters. 
    402        """ 
    403        result = {} 
    404        if self.next_page_token is not None: 
    405            result[self._PAGE_TOKEN] = self.next_page_token 
    406 
    407        page_size = None 
    408        if self.max_results is not None: 
    409            page_size = self.max_results - self.num_results 
    410            if self._page_size is not None: 
    411                page_size = min(page_size, self._page_size) 
    412        elif self._page_size is not None: 
    413            page_size = self._page_size 
    414 
    415        if page_size is not None: 
    416            result[self._MAX_RESULTS] = page_size 
    417 
    418        result.update(self.extra_params) 
    419        return result 
    420 
    421    def _get_next_page_response(self): 
    422        """Requests the next page from the path provided. 
    423 
    424        Returns: 
    425            dict: The parsed JSON response of the next page's contents. 
    426 
    427        Raises: 
    428            ValueError: If the HTTP method is not ``GET`` or ``POST``. 
    429        """ 
    430        params = self._get_query_params() 
    431        if self._HTTP_METHOD == "GET": 
    432            return self.api_request( 
    433                method=self._HTTP_METHOD, path=self.path, query_params=params 
    434            ) 
    435        elif self._HTTP_METHOD == "POST": 
    436            return self.api_request( 
    437                method=self._HTTP_METHOD, path=self.path, data=params 
    438            ) 
    439        else: 
    440            raise ValueError("Unexpected HTTP method", self._HTTP_METHOD) 
    441 
    442 
    443class _GAXIterator(Iterator): 
    444    """A generic class for iterating through Cloud gRPC APIs list responses. 
    445 
    446    Any: 
    447        client (google.cloud.client.Client): The API client. 
    448        page_iter (google.gax.PageIterator): A GAX page iterator to be wrapped 
    449            to conform to the :class:`Iterator` interface. 
    450        item_to_value (Callable[Iterator, Any]): Callable to convert an item 
    451            from the protobuf response into a native object. Will 
    452            be called with the iterator and a single item. 
    453        max_results (int): The maximum number of results to fetch. 
    454 
    455    .. autoattribute:: pages 
    456    """ 
    457 
    458    def __init__(self, client, page_iter, item_to_value, max_results=None): 
    459        super(_GAXIterator, self).__init__( 
    460            client, 
    461            item_to_value, 
    462            page_token=page_iter.page_token, 
    463            max_results=max_results, 
    464        ) 
    465        self._gax_page_iter = page_iter 
    466 
    467    def _next_page(self): 
    468        """Get the next page in the iterator. 
    469 
    470        Wraps the response from the :class:`~google.gax.PageIterator` in a 
    471        :class:`Page` instance and captures some state at each page. 
    472 
    473        Returns: 
    474            Optional[Page]: The next page in the iterator or :data:`None` if 
    475                  there are no pages left. 
    476        """ 
    477        try: 
    478            items = next(self._gax_page_iter) 
    479            page = Page(self, items, self.item_to_value) 
    480            self.next_page_token = self._gax_page_iter.page_token or None 
    481            return page 
    482        except StopIteration: 
    483            return None 
    484 
    485 
    486class GRPCIterator(Iterator): 
    487    """A generic class for iterating through gRPC list responses. 
    488 
    489    .. note:: The class does not take a ``page_token`` argument because it can 
    490        just be specified in the ``request``. 
    491 
    492    Args: 
    493        client (google.cloud.client.Client): The API client. This unused by 
    494            this class, but kept to satisfy the :class:`Iterator` interface. 
    495        method (Callable[protobuf.Message]): A bound gRPC method that should 
    496            take a single message for the request. 
    497        request (protobuf.Message): The request message. 
    498        items_field (str): The field in the response message that has the 
    499            items for the page. 
    500        item_to_value (Callable[GRPCIterator, Any]): Callable to convert an 
    501            item from the type in the JSON response into a native object. Will 
    502            be called with the iterator and a single item. 
    503        request_token_field (str): The field in the request message used to 
    504            specify the page token. 
    505        response_token_field (str): The field in the response message that has 
    506            the token for the next page. 
    507        max_results (int): The maximum number of results to fetch. 
    508 
    509    .. autoattribute:: pages 
    510    """ 
    511 
    512    _DEFAULT_REQUEST_TOKEN_FIELD = "page_token" 
    513    _DEFAULT_RESPONSE_TOKEN_FIELD = "next_page_token" 
    514 
    515    def __init__( 
    516        self, 
    517        client, 
    518        method, 
    519        request, 
    520        items_field, 
    521        item_to_value=_item_to_value_identity, 
    522        request_token_field=_DEFAULT_REQUEST_TOKEN_FIELD, 
    523        response_token_field=_DEFAULT_RESPONSE_TOKEN_FIELD, 
    524        max_results=None, 
    525    ): 
    526        super(GRPCIterator, self).__init__( 
    527            client, item_to_value, max_results=max_results 
    528        ) 
    529        self._method = method 
    530        self._request = request 
    531        self._items_field = items_field 
    532        self._request_token_field = request_token_field 
    533        self._response_token_field = response_token_field 
    534 
    535    def _next_page(self): 
    536        """Get the next page in the iterator. 
    537 
    538        Returns: 
    539            Page: The next page in the iterator or :data:`None` if 
    540                there are no pages left. 
    541        """ 
    542        if not self._has_next_page(): 
    543            return None 
    544 
    545        if self.next_page_token is not None: 
    546            setattr(self._request, self._request_token_field, self.next_page_token) 
    547 
    548        response = self._method(self._request) 
    549 
    550        self.next_page_token = getattr(response, self._response_token_field) 
    551        items = getattr(response, self._items_field) 
    552        page = Page(self, items, self.item_to_value, raw_page=response) 
    553 
    554        return page 
    555 
    556    def _has_next_page(self): 
    557        """Determines whether or not there are more pages with results. 
    558 
    559        Returns: 
    560            bool: Whether the iterator has more pages. 
    561        """ 
    562        if self.page_number == 0: 
    563            return True 
    564 
    565        if self.max_results is not None: 
    566            if self.num_results >= self.max_results: 
    567                return False 
    568 
    569        # Note: intentionally a falsy check instead of a None check. The RPC 
    570        # can return an empty string indicating no more pages. 
    571        return True if self.next_page_token else False