8
8
import time
9
9
10
10
import six
11
+ import sys
11
12
12
13
try :
13
14
from Queue import Empty , Queue
16
17
17
18
from kafka .common import (
18
19
FetchRequest , OffsetRequest ,
19
- ConsumerFetchSizeTooSmall , ConsumerNoMoreData
20
+ ConsumerFetchSizeTooSmall , ConsumerNoMoreData ,
21
+ UnknownTopicOrPartitionError , NotLeaderForPartitionError ,
22
+ OffsetOutOfRangeError , check_error
20
23
)
21
24
from .base import (
22
25
Consumer ,
@@ -94,6 +97,10 @@ class SimpleConsumer(Consumer):
94
97
message in the iterator before exiting. None means no
95
98
timeout, so it will wait forever.
96
99
100
+ auto_offset_reset: default largest. Reset partition offsets upon
101
+ OffsetOutOfRangeError. Valid values are largest and smallest.
102
+ Otherwise, do not reset the offsets and raise OffsetOutOfRangeError.
103
+
97
104
Auto commit details:
98
105
If both auto_commit_every_n and auto_commit_every_t are set, they will
99
106
reset one another when one is triggered. These triggers simply call the
@@ -106,7 +113,8 @@ def __init__(self, client, group, topic, auto_commit=True, partitions=None,
106
113
fetch_size_bytes = FETCH_MIN_BYTES ,
107
114
buffer_size = FETCH_BUFFER_SIZE_BYTES ,
108
115
max_buffer_size = MAX_FETCH_BUFFER_SIZE_BYTES ,
109
- iter_timeout = None ):
116
+ iter_timeout = None ,
117
+ auto_offset_reset = 'largest' ):
110
118
super (SimpleConsumer , self ).__init__ (
111
119
client , group , topic ,
112
120
partitions = partitions ,
@@ -125,12 +133,38 @@ def __init__(self, client, group, topic, auto_commit=True, partitions=None,
125
133
self .fetch_min_bytes = fetch_size_bytes
126
134
self .fetch_offsets = self .offsets .copy ()
127
135
self .iter_timeout = iter_timeout
136
+ self .auto_offset_reset = auto_offset_reset
128
137
self .queue = Queue ()
129
138
130
139
def __repr__ (self ):
131
140
return '<SimpleConsumer group=%s, topic=%s, partitions=%s>' % \
132
141
(self .group , self .topic , str (self .offsets .keys ()))
133
142
143
+ def reset_partition_offset (self , partition ):
144
+ LATEST = - 1
145
+ EARLIEST = - 2
146
+ if self .auto_offset_reset == 'largest' :
147
+ reqs = [OffsetRequest (self .topic , partition , LATEST , 1 )]
148
+ elif self .auto_offset_reset == 'smallest' :
149
+ reqs = [OffsetRequest (self .topic , partition , EARLIEST , 1 )]
150
+ else :
151
+ # Let's raise an reasonable exception type if user calls
152
+ # outside of an exception context
153
+ if sys .exc_info () == (None , None , None ):
154
+ raise OffsetOutOfRangeError ('Cannot reset partition offsets without a '
155
+ 'valid auto_offset_reset setting '
156
+ '(largest|smallest)' )
157
+ # Otherwise we should re-raise the upstream exception
158
+ # b/c it typically includes additional data about
159
+ # the request that triggered it, and we do not want to drop that
160
+ raise
161
+
162
+ # send_offset_request
163
+ (resp , ) = self .client .send_offset_request (reqs )
164
+ check_error (resp )
165
+ self .offsets [partition ] = resp .offsets [0 ]
166
+ self .fetch_offsets [partition ] = resp .offsets [0 ]
167
+
134
168
def provide_partition_info (self ):
135
169
"""
136
170
Indicates that partition info must be returned by the consumer
@@ -297,10 +331,27 @@ def _fetch(self):
297
331
responses = self .client .send_fetch_request (
298
332
requests ,
299
333
max_wait_time = int (self .fetch_max_wait_time ),
300
- min_bytes = self .fetch_min_bytes )
334
+ min_bytes = self .fetch_min_bytes ,
335
+ fail_on_error = False
336
+ )
301
337
302
338
retry_partitions = {}
303
339
for resp in responses :
340
+
341
+ try :
342
+ check_error (resp )
343
+ except (UnknownTopicOrPartitionError , NotLeaderForPartitionError ):
344
+ self .client .reset_topic_metadata (resp .topic )
345
+ raise
346
+ except OffsetOutOfRangeError :
347
+ log .warning ("OffsetOutOfRangeError for %s - %d. "
348
+ "Resetting partition offset..." ,
349
+ resp .topic , resp .partition )
350
+ self .reset_partition_offset (resp .partition )
351
+ # Retry this partition
352
+ retry_partitions [resp .partition ] = partitions [resp .partition ]
353
+ continue
354
+
304
355
partition = resp .partition
305
356
buffer_size = partitions [partition ]
306
357
try :
0 commit comments