@@ -615,7 +615,7 @@ def partitions_for_topic(self, topic):
615
615
partitions = cluster .partitions_for_topic (topic )
616
616
return partitions
617
617
618
- def poll (self , timeout_ms = 0 , max_records = None , update_offsets = True ):
618
+ def poll (self , timeout_ms = 0 , positions_timeout_ms = float ( "inf" ), max_records = None , update_offsets = True ):
619
619
"""Fetch data from assigned topics / partitions.
620
620
621
621
Records are fetched and returned in batches by topic-partition.
@@ -656,7 +656,7 @@ def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
656
656
start = time .time ()
657
657
remaining = timeout_ms
658
658
while not self ._closed :
659
- records = self ._poll_once (remaining , max_records , update_offsets = update_offsets )
659
+ records = self ._poll_once (remaining , positions_timeout_ms , max_records , update_offsets = update_offsets )
660
660
if records :
661
661
return records
662
662
@@ -668,7 +668,7 @@ def poll(self, timeout_ms=0, max_records=None, update_offsets=True):
668
668
669
669
return {}
670
670
671
- def _poll_once (self , timeout_ms , max_records , update_offsets = True ):
671
+ def _poll_once (self , timeout_ms , positions_timeout_ms , max_records , update_offsets = True ):
672
672
"""Do one round of polling. In addition to checking for new data, this does
673
673
any needed heart-beating, auto-commits, and offset updates.
674
674
@@ -683,7 +683,7 @@ def _poll_once(self, timeout_ms, max_records, update_offsets=True):
683
683
# Fetch positions if we have partitions we're subscribed to that we
684
684
# don't know the offset for
685
685
if not self ._subscription .has_all_fetch_positions ():
686
- self ._update_fetch_positions (self ._subscription .missing_fetch_positions ())
686
+ self ._update_fetch_positions (self ._subscription .missing_fetch_positions (), positions_timeout_ms )
687
687
688
688
# If data is available already, e.g. from a previous network client
689
689
# poll() call to commit, then just return it immediately
@@ -714,7 +714,7 @@ def _poll_once(self, timeout_ms, max_records, update_offsets=True):
714
714
records , _ = self ._fetcher .fetched_records (max_records , update_offsets = update_offsets )
715
715
return records
716
716
717
- def position (self , partition ):
717
+ def position (self , partition , timeout_ms = float ( "inf" ) ):
718
718
"""Get the offset of the next record that will be fetched
719
719
720
720
Arguments:
@@ -728,7 +728,7 @@ def position(self, partition):
728
728
assert self ._subscription .is_assigned (partition ), 'Partition is not assigned'
729
729
offset = self ._subscription .assignment [partition ].position
730
730
if offset is None :
731
- self ._update_fetch_positions ([partition ])
731
+ self ._update_fetch_positions ([partition ], timeout_ms )
732
732
offset = self ._subscription .assignment [partition ].position
733
733
return offset
734
734
@@ -1087,7 +1087,7 @@ def _use_consumer_group(self):
1087
1087
return False
1088
1088
return True
1089
1089
1090
- def _update_fetch_positions (self , partitions ):
1090
+ def _update_fetch_positions (self , partitions , timeout_ms ):
1091
1091
"""Set the fetch position to the committed position (if there is one)
1092
1092
or reset it using the offset reset policy the user has configured.
1093
1093
@@ -1099,12 +1099,13 @@ def _update_fetch_positions(self, partitions):
1099
1099
NoOffsetForPartitionError: If no offset is stored for a given
1100
1100
partition and no offset reset policy is defined.
1101
1101
"""
1102
+ end_time = time .time () + timeout_ms / 1000
1102
1103
# Lookup any positions for partitions which are awaiting reset (which may be the
1103
1104
# case if the user called :meth:`seek_to_beginning` or :meth:`seek_to_end`. We do
1104
1105
# this check first to avoid an unnecessary lookup of committed offsets (which
1105
1106
# typically occurs when the user is manually assigning partitions and managing
1106
1107
# their own offsets).
1107
- self ._fetcher .reset_offsets_if_needed (partitions )
1108
+ self ._fetcher .reset_offsets_if_needed (partitions , timeout_ms )
1108
1109
1109
1110
if not self ._subscription .has_all_fetch_positions ():
1110
1111
# if we still don't have offsets for all partitions, then we should either seek
@@ -1115,7 +1116,8 @@ def _update_fetch_positions(self, partitions):
1115
1116
self ._coordinator .refresh_committed_offsets_if_needed ()
1116
1117
1117
1118
# Then, do any offset lookups in case some positions are not known
1118
- self ._fetcher .update_fetch_positions (partitions )
1119
+ update_timeout_ms = max (0.0 , 1000 * (end_time - time .time ()))
1120
+ self ._fetcher .update_fetch_positions (partitions , update_timeout_ms )
1119
1121
1120
1122
def _message_generator_v2 (self ):
1121
1123
timeout_ms = 1000 * (self ._consumer_timeout - time .time ())
@@ -1145,7 +1147,8 @@ def _message_generator(self):
1145
1147
# Fetch offsets for any subscribed partitions that we arent tracking yet
1146
1148
if not self ._subscription .has_all_fetch_positions ():
1147
1149
partitions = self ._subscription .missing_fetch_positions ()
1148
- self ._update_fetch_positions (partitions )
1150
+ update_timeout_ms = max (0.0 , 1000 * (self ._consumer_timeout - time .time ()))
1151
+ self ._update_fetch_positions (partitions , update_timeout_ms )
1149
1152
1150
1153
poll_ms = min ((1000 * (self ._consumer_timeout - time .time ())), self .config ['retry_backoff_ms' ])
1151
1154
self ._client .poll (timeout_ms = poll_ms )
0 commit comments