Skip to content

Commit c678d35

Browse files
segetsyburnash
andauthored
[fix/3358] add pagination stopping to JSONResponseCursorPaginator (#3374)
* [fix/3358] add pagination stopping to JSONResponseCursorPaginator * [fix/3358] add some tests when there are more pages * [fix/3358] fix naming * [fix/3374] make stop_after_empty_page robust to data = None * [fix/3358] align has more handling with RangePaginator and add test cases * Compile path in __init__ short-circuit on empty page before touching has_more --------- Co-authored-by: Anton Burnashev <[email protected]>
1 parent c4515d7 commit c678d35

File tree

2 files changed

+97
-1
lines changed

2 files changed

+97
-1
lines changed

dlt/sources/helpers/rest_client/paginators.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,9 @@ def __init__(
846846
cursor_path: jsonpath.TJsonPath = "cursors.next",
847847
cursor_param: Optional[str] = None,
848848
cursor_body_path: Optional[str] = None,
849+
*,
850+
stop_after_empty_page: bool = False,
851+
has_more_path: Optional[jsonpath.TJsonPath] = None,
849852
):
850853
"""
851854
Args:
@@ -854,6 +857,10 @@ def __init__(
854857
cursor_param: The name of the query parameter to be used in
855858
the request to get the next page.
856859
cursor_body_path: The dot-separated path where to place the cursor in the request body.
860+
stop_after_empty_page: Whether pagination should stop when
861+
a page contains no result items. Defaults to `False`.
862+
has_more_path: The JSON path to a boolean value in the response
863+
indicating whether there are more items to fetch.
857864
"""
858865
super().__init__()
859866
self.cursor_path = jsonpath.compile_path(cursor_path)
@@ -869,12 +876,48 @@ def __init__(
869876

870877
self.cursor_param = cursor_param
871878
self.cursor_body_path = cursor_body_path
879+
self.stop_after_empty_page = stop_after_empty_page
880+
self.has_more_path = jsonpath.compile_path(has_more_path) if has_more_path else None
872881

873882
def update_state(self, response: Response, data: Optional[List[Any]] = None) -> None:
874883
"""Extracts the cursor value from the JSON response."""
875-
values = jsonpath.find_values(self.cursor_path, response.json())
884+
response_json = response.json()
885+
values = jsonpath.find_values(self.cursor_path, response_json)
876886
self._next_reference = values[0] if values and values[0] else None
877887

888+
if self.stop_after_empty_page and not data:
889+
self._has_next_page = False
890+
return
891+
892+
has_more = None
893+
if self.has_more_path:
894+
values = jsonpath.find_values(self.has_more_path, response_json)
895+
has_more = values[0] if values else None
896+
if has_more is None:
897+
self._handle_missing_has_more(response_json)
898+
elif isinstance(has_more, str):
899+
try:
900+
has_more = str2bool(has_more)
901+
except ValueError:
902+
self._handle_invalid_has_more(has_more)
903+
elif not isinstance(has_more, bool):
904+
self._handle_invalid_has_more(has_more)
905+
906+
self._has_next_page = has_more
907+
908+
def _handle_invalid_has_more(self, has_more: Any) -> None:
909+
raise ValueError(
910+
f"'{self.has_more_path}' is not a `bool` in the response in"
911+
f" `{self.__class__.__name__}`. Expected a boolean, got `{has_more}`"
912+
)
913+
914+
def _handle_missing_has_more(self, response_json: Dict[str, Any]) -> None:
915+
raise ValueError(
916+
f"Has more value not found in the response in `{self.__class__.__name__}`. "
917+
f"Expected a response with a `{self.has_more_path}` key, got"
918+
f" `{response_json}`."
919+
)
920+
878921
def update_request(self, request: Request) -> None:
879922
"""Updates the request with the cursor value either in query parameters
880923
or in the request JSON body."""

tests/sources/helpers/rest_client/test_paginators.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -953,6 +953,59 @@ def test_update_state_when_cursor_path_is_empty_string(self):
953953
paginator.update_state(response)
954954
assert paginator.has_next_page is False
955955

956+
def test_update_state_stop_after_empty_page(self):
957+
paginator = JSONResponseCursorPaginator(
958+
cursor_path="next_cursor", stop_after_empty_page=True
959+
)
960+
response = Mock(Response, json=lambda: {"next_cursor": "cursor", "results": []})
961+
paginator.update_state(response, [])
962+
assert paginator.has_next_page is False
963+
964+
def test_update_has_more_path(self):
965+
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
966+
response = Mock(
967+
Response, json=lambda: {"next_cursor": "cursor", "results": [], "has_more": False}
968+
)
969+
paginator.update_state(response)
970+
assert paginator.has_next_page is False
971+
972+
def test_update_has_more_path_string(self):
973+
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
974+
response = Mock(
975+
Response, json=lambda: {"next_cursor": "cursor", "results": [], "has_more": "false"}
976+
)
977+
paginator.update_state(response)
978+
assert paginator.has_next_page is False
979+
980+
def test_update_has_more_path_true_nonempty_result(self):
981+
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
982+
response = Mock(
983+
Response,
984+
json=lambda: {
985+
"next_cursor": "cursor",
986+
"results": [{"hello": "world"}],
987+
"has_more": True,
988+
},
989+
)
990+
paginator.update_state(response)
991+
assert paginator.has_next_page is True
992+
993+
def test_update_has_more_path_true(self):
994+
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
995+
response = Mock(
996+
Response, json=lambda: {"next_cursor": "cursor", "results": [], "has_more": True}
997+
)
998+
paginator.update_state(response)
999+
assert paginator.has_next_page is True
1000+
1001+
def test_update_has_more_path_missing(self):
1002+
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor", has_more_path="has_more")
1003+
response = Mock(
1004+
Response, json=lambda: {"next_cursor": "cursor", "results": [{"hello", "world"}]}
1005+
)
1006+
with pytest.raises(ValueError, match="Has more value not found in the response"):
1007+
paginator.update_state(response)
1008+
9561009
def test_update_request_param(self):
9571010
paginator = JSONResponseCursorPaginator(cursor_path="next_cursor")
9581011
paginator._next_reference = "cursor-2"

0 commit comments

Comments
 (0)