1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Iterators for paging through paged API methods.
16
17These iterators simplify the process of paging through API responses
18where the request takes a page token and the response is a list of results with
19a token for the next page. See `list pagination`_ in the Google API Style Guide
20for more details.
21
22.. _list pagination:
23 https://cloud.google.com/apis/design/design_patterns#list_pagination
24
25API clients that have methods that follow the list pagination pattern can
26return an :class:`.Iterator`. You can use this iterator to get **all** of
27the results across all pages::
28
29 >>> results_iterator = client.list_resources()
30 >>> list(results_iterator) # Convert to a list (consumes all values).
31
32Or you can walk your way through items and call off the search early if
33you find what you're looking for (resulting in possibly fewer requests)::
34
35 >>> for resource in results_iterator:
36 ... print(resource.name)
37 ... if not resource.is_valid:
38 ... break
39
40At any point, you may check the number of items consumed by referencing the
41``num_results`` property of the iterator::
42
43 >>> for my_item in results_iterator:
44 ... if results_iterator.num_results >= 10:
45 ... break
46
47When iterating, not every new item will send a request to the server.
48To iterate based on each page of items (where a page corresponds to
49a request)::
50
51 >>> for page in results_iterator.pages:
52 ... print('=' * 20)
53 ... print(' Page number: {:d}'.format(iterator.page_number))
54 ... print(' Items in page: {:d}'.format(page.num_items))
55 ... print(' First item: {!r}'.format(next(page)))
56 ... print('Items remaining: {:d}'.format(page.remaining))
57 ... print('Next page token: {}'.format(iterator.next_page_token))
58 ====================
59 Page number: 1
60 Items in page: 1
61 First item: <MyItemClass at 0x7f1d3cccf690>
62 Items remaining: 0
63 Next page token: eav1OzQB0OM8rLdGXOEsyQWSG
64 ====================
65 Page number: 2
66 Items in page: 19
67 First item: <MyItemClass at 0x7f1d3cccffd0>
68 Items remaining: 18
69 Next page token: None
70
71Then, for each page you can get all the resources on that page by iterating
72through it or using :func:`list`::
73
74 >>> list(page)
75 [
76 <MyItemClass at 0x7fd64a098ad0>,
77 <MyItemClass at 0x7fd64a098ed0>,
78 <MyItemClass at 0x7fd64a098e90>,
79 ]
80"""
81
82import abc
83
84
85class Page(object):
86 """Single page of results in an iterator.
87
88 Args:
89 parent (google.api_core.page_iterator.Iterator): The iterator that owns
90 the current page.
91 items (Sequence[Any]): An iterable (that also defines __len__) of items
92 from a raw API response.
93 item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]):
94 Callable to convert an item from the type in the raw API response
95 into the native object. Will be called with the iterator and a
96 single item.
97 raw_page Optional[google.protobuf.message.Message]:
98 The raw page response.
99 """
100
101 def __init__(self, parent, items, item_to_value, raw_page=None):
102 self._parent = parent
103 self._num_items = len(items)
104 self._remaining = self._num_items
105 self._item_iter = iter(items)
106 self._item_to_value = item_to_value
107 self._raw_page = raw_page
108
109 @property
110 def raw_page(self):
111 """google.protobuf.message.Message"""
112 return self._raw_page
113
114 @property
115 def num_items(self):
116 """int: Total items in the page."""
117 return self._num_items
118
119 @property
120 def remaining(self):
121 """int: Remaining items in the page."""
122 return self._remaining
123
124 def __iter__(self):
125 """The :class:`Page` is an iterator of items."""
126 return self
127
128 def __next__(self):
129 """Get the next value in the page."""
130 item = next(self._item_iter)
131 result = self._item_to_value(self._parent, item)
132 # Since we've successfully got the next value from the
133 # iterator, we update the number of remaining.
134 self._remaining -= 1
135 return result
136
137
138def _item_to_value_identity(iterator, item):
139 """An item to value transformer that returns the item un-changed."""
140 # pylint: disable=unused-argument
141 # We are conforming to the interface defined by Iterator.
142 return item
143
144
145class Iterator(object, metaclass=abc.ABCMeta):
146 """A generic class for iterating through API list responses.
147
148 Args:
149 client(google.cloud.client.Client): The API client.
150 item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]):
151 Callable to convert an item from the type in the raw API response
152 into the native object. Will be called with the iterator and a
153 single item.
154 page_token (str): A token identifying a page in a result set to start
155 fetching results from.
156 max_results (int): The maximum number of results to fetch.
157 """
158
159 def __init__(
160 self,
161 client,
162 item_to_value=_item_to_value_identity,
163 page_token=None,
164 max_results=None,
165 ):
166 self._started = False
167 self.__active_iterator = None
168
169 self.client = client
170 """Optional[Any]: The client that created this iterator."""
171 self.item_to_value = item_to_value
172 """Callable[Iterator, Any]: Callable to convert an item from the type
173 in the raw API response into the native object. Will be called with
174 the iterator and a
175 single item.
176 """
177 self.max_results = max_results
178 """int: The maximum number of results to fetch"""
179
180 # The attributes below will change over the life of the iterator.
181 self.page_number = 0
182 """int: The current page of results."""
183 self.next_page_token = page_token
184 """str: The token for the next page of results. If this is set before
185 the iterator starts, it effectively offsets the iterator to a
186 specific starting point."""
187 self.num_results = 0
188 """int: The total number of results fetched so far."""
189
190 @property
191 def pages(self):
192 """Iterator of pages in the response.
193
194 returns:
195 types.GeneratorType[google.api_core.page_iterator.Page]: A
196 generator of page instances.
197
198 raises:
199 ValueError: If the iterator has already been started.
200 """
201 if self._started:
202 raise ValueError("Iterator has already started", self)
203 self._started = True
204 return self._page_iter(increment=True)
205
206 def _items_iter(self):
207 """Iterator for each item returned."""
208 for page in self._page_iter(increment=False):
209 for item in page:
210 self.num_results += 1
211 yield item
212
213 def __iter__(self):
214 """Iterator for each item returned.
215
216 Returns:
217 types.GeneratorType[Any]: A generator of items from the API.
218
219 Raises:
220 ValueError: If the iterator has already been started.
221 """
222 if self._started:
223 raise ValueError("Iterator has already started", self)
224 self._started = True
225 return self._items_iter()
226
227 def __next__(self):
228 if self.__active_iterator is None:
229 self.__active_iterator = iter(self)
230 return next(self.__active_iterator)
231
232 def _page_iter(self, increment):
233 """Generator of pages of API responses.
234
235 Args:
236 increment (bool): Flag indicating if the total number of results
237 should be incremented on each page. This is useful since a page
238 iterator will want to increment by results per page while an
239 items iterator will want to increment per item.
240
241 Yields:
242 Page: each page of items from the API.
243 """
244 page = self._next_page()
245 while page is not None:
246 self.page_number += 1
247 if increment:
248 self.num_results += page.num_items
249 yield page
250 page = self._next_page()
251
252 @abc.abstractmethod
253 def _next_page(self):
254 """Get the next page in the iterator.
255
256 This does nothing and is intended to be over-ridden by subclasses
257 to return the next :class:`Page`.
258
259 Raises:
260 NotImplementedError: Always, this method is abstract.
261 """
262 raise NotImplementedError
263
264
265def _do_nothing_page_start(iterator, page, response):
266 """Helper to provide custom behavior after a :class:`Page` is started.
267
268 This is a do-nothing stand-in as the default value.
269
270 Args:
271 iterator (Iterator): An iterator that holds some request info.
272 page (Page): The page that was just created.
273 response (Any): The API response for a page.
274 """
275 # pylint: disable=unused-argument
276 pass
277
278
279class HTTPIterator(Iterator):
280 """A generic class for iterating through HTTP/JSON API list responses.
281
282 To make an iterator work, you'll need to provide a way to convert a JSON
283 item returned from the API into the object of your choice (via
284 ``item_to_value``). You also may need to specify a custom ``items_key`` so
285 that a given response (containing a page of results) can be parsed into an
286 iterable page of the actual objects you want.
287
288 Args:
289 client (google.cloud.client.Client): The API client.
290 api_request (Callable): The function to use to make API requests.
291 Generally, this will be
292 :meth:`google.cloud._http.JSONConnection.api_request`.
293 path (str): The method path to query for the list of items.
294 item_to_value (Callable[google.api_core.page_iterator.Iterator, Any]):
295 Callable to convert an item from the type in the JSON response into
296 a native object. Will be called with the iterator and a single
297 item.
298 items_key (str): The key in the API response where the list of items
299 can be found.
300 page_token (str): A token identifying a page in a result set to start
301 fetching results from.
302 page_size (int): The maximum number of results to fetch per page
303 max_results (int): The maximum number of results to fetch
304 extra_params (dict): Extra query string parameters for the
305 API call.
306 page_start (Callable[
307 google.api_core.page_iterator.Iterator,
308 google.api_core.page_iterator.Page, dict]): Callable to provide
309 any special behavior after a new page has been created. Assumed
310 signature takes the :class:`.Iterator` that started the page,
311 the :class:`.Page` that was started and the dictionary containing
312 the page response.
313 next_token (str): The name of the field used in the response for page
314 tokens.
315
316 .. autoattribute:: pages
317 """
318
319 _DEFAULT_ITEMS_KEY = "items"
320 _PAGE_TOKEN = "pageToken"
321 _MAX_RESULTS = "maxResults"
322 _NEXT_TOKEN = "nextPageToken"
323 _RESERVED_PARAMS = frozenset([_PAGE_TOKEN])
324 _HTTP_METHOD = "GET"
325
326 def __init__(
327 self,
328 client,
329 api_request,
330 path,
331 item_to_value,
332 items_key=_DEFAULT_ITEMS_KEY,
333 page_token=None,
334 page_size=None,
335 max_results=None,
336 extra_params=None,
337 page_start=_do_nothing_page_start,
338 next_token=_NEXT_TOKEN,
339 ):
340 super(HTTPIterator, self).__init__(
341 client, item_to_value, page_token=page_token, max_results=max_results
342 )
343 self.api_request = api_request
344 self.path = path
345 self._items_key = items_key
346 self.extra_params = extra_params
347 self._page_size = page_size
348 self._page_start = page_start
349 self._next_token = next_token
350 # Verify inputs / provide defaults.
351 if self.extra_params is None:
352 self.extra_params = {}
353 self._verify_params()
354
355 def _verify_params(self):
356 """Verifies the parameters don't use any reserved parameter.
357
358 Raises:
359 ValueError: If a reserved parameter is used.
360 """
361 reserved_in_use = self._RESERVED_PARAMS.intersection(self.extra_params)
362 if reserved_in_use:
363 raise ValueError("Using a reserved parameter", reserved_in_use)
364
365 def _next_page(self):
366 """Get the next page in the iterator.
367
368 Returns:
369 Optional[Page]: The next page in the iterator or :data:`None` if
370 there are no pages left.
371 """
372 if self._has_next_page():
373 response = self._get_next_page_response()
374 items = response.get(self._items_key, ())
375 page = Page(self, items, self.item_to_value, raw_page=response)
376 self._page_start(self, page, response)
377 self.next_page_token = response.get(self._next_token)
378 return page
379 else:
380 return None
381
382 def _has_next_page(self):
383 """Determines whether or not there are more pages with results.
384
385 Returns:
386 bool: Whether the iterator has more pages.
387 """
388 if self.page_number == 0:
389 return True
390
391 if self.max_results is not None:
392 if self.num_results >= self.max_results:
393 return False
394
395 return self.next_page_token is not None
396
397 def _get_query_params(self):
398 """Getter for query parameters for the next request.
399
400 Returns:
401 dict: A dictionary of query parameters.
402 """
403 result = {}
404 if self.next_page_token is not None:
405 result[self._PAGE_TOKEN] = self.next_page_token
406
407 page_size = None
408 if self.max_results is not None:
409 page_size = self.max_results - self.num_results
410 if self._page_size is not None:
411 page_size = min(page_size, self._page_size)
412 elif self._page_size is not None:
413 page_size = self._page_size
414
415 if page_size is not None:
416 result[self._MAX_RESULTS] = page_size
417
418 result.update(self.extra_params)
419 return result
420
421 def _get_next_page_response(self):
422 """Requests the next page from the path provided.
423
424 Returns:
425 dict: The parsed JSON response of the next page's contents.
426
427 Raises:
428 ValueError: If the HTTP method is not ``GET`` or ``POST``.
429 """
430 params = self._get_query_params()
431 if self._HTTP_METHOD == "GET":
432 return self.api_request(
433 method=self._HTTP_METHOD, path=self.path, query_params=params
434 )
435 elif self._HTTP_METHOD == "POST":
436 return self.api_request(
437 method=self._HTTP_METHOD, path=self.path, data=params
438 )
439 else:
440 raise ValueError("Unexpected HTTP method", self._HTTP_METHOD)
441
442
443class _GAXIterator(Iterator):
444 """A generic class for iterating through Cloud gRPC APIs list responses.
445
446 Any:
447 client (google.cloud.client.Client): The API client.
448 page_iter (google.gax.PageIterator): A GAX page iterator to be wrapped
449 to conform to the :class:`Iterator` interface.
450 item_to_value (Callable[Iterator, Any]): Callable to convert an item
451 from the protobuf response into a native object. Will
452 be called with the iterator and a single item.
453 max_results (int): The maximum number of results to fetch.
454
455 .. autoattribute:: pages
456 """
457
458 def __init__(self, client, page_iter, item_to_value, max_results=None):
459 super(_GAXIterator, self).__init__(
460 client,
461 item_to_value,
462 page_token=page_iter.page_token,
463 max_results=max_results,
464 )
465 self._gax_page_iter = page_iter
466
467 def _next_page(self):
468 """Get the next page in the iterator.
469
470 Wraps the response from the :class:`~google.gax.PageIterator` in a
471 :class:`Page` instance and captures some state at each page.
472
473 Returns:
474 Optional[Page]: The next page in the iterator or :data:`None` if
475 there are no pages left.
476 """
477 try:
478 items = next(self._gax_page_iter)
479 page = Page(self, items, self.item_to_value)
480 self.next_page_token = self._gax_page_iter.page_token or None
481 return page
482 except StopIteration:
483 return None
484
485
486class GRPCIterator(Iterator):
487 """A generic class for iterating through gRPC list responses.
488
489 .. note:: The class does not take a ``page_token`` argument because it can
490 just be specified in the ``request``.
491
492 Args:
493 client (google.cloud.client.Client): The API client. This unused by
494 this class, but kept to satisfy the :class:`Iterator` interface.
495 method (Callable[protobuf.Message]): A bound gRPC method that should
496 take a single message for the request.
497 request (protobuf.Message): The request message.
498 items_field (str): The field in the response message that has the
499 items for the page.
500 item_to_value (Callable[GRPCIterator, Any]): Callable to convert an
501 item from the type in the JSON response into a native object. Will
502 be called with the iterator and a single item.
503 request_token_field (str): The field in the request message used to
504 specify the page token.
505 response_token_field (str): The field in the response message that has
506 the token for the next page.
507 max_results (int): The maximum number of results to fetch.
508
509 .. autoattribute:: pages
510 """
511
512 _DEFAULT_REQUEST_TOKEN_FIELD = "page_token"
513 _DEFAULT_RESPONSE_TOKEN_FIELD = "next_page_token"
514
515 def __init__(
516 self,
517 client,
518 method,
519 request,
520 items_field,
521 item_to_value=_item_to_value_identity,
522 request_token_field=_DEFAULT_REQUEST_TOKEN_FIELD,
523 response_token_field=_DEFAULT_RESPONSE_TOKEN_FIELD,
524 max_results=None,
525 ):
526 super(GRPCIterator, self).__init__(
527 client, item_to_value, max_results=max_results
528 )
529 self._method = method
530 self._request = request
531 self._items_field = items_field
532 self._request_token_field = request_token_field
533 self._response_token_field = response_token_field
534
535 def _next_page(self):
536 """Get the next page in the iterator.
537
538 Returns:
539 Page: The next page in the iterator or :data:`None` if
540 there are no pages left.
541 """
542 if not self._has_next_page():
543 return None
544
545 if self.next_page_token is not None:
546 setattr(self._request, self._request_token_field, self.next_page_token)
547
548 response = self._method(self._request)
549
550 self.next_page_token = getattr(response, self._response_token_field)
551 items = getattr(response, self._items_field)
552 page = Page(self, items, self.item_to_value, raw_page=response)
553
554 return page
555
556 def _has_next_page(self):
557 """Determines whether or not there are more pages with results.
558
559 Returns:
560 bool: Whether the iterator has more pages.
561 """
562 if self.page_number == 0:
563 return True
564
565 if self.max_results is not None:
566 if self.num_results >= self.max_results:
567 return False
568
569 # Note: intentionally a falsy check instead of a None check. The RPC
570 # can return an empty string indicating no more pages.
571 return True if self.next_page_token else False