26
26
)
27
27
from redis .asyncio .lock import Lock
28
28
from redis .asyncio .parser import CommandsParser
29
+ from redis .asyncio .retry import Retry
30
+ from redis .backoff import default_backoff
29
31
from redis .client import EMPTY_RESPONSE , NEVER_DECODE , AbstractRedis
30
32
from redis .cluster import (
31
33
PIPELINE_BLOCKED_COMMANDS ,
@@ -110,10 +112,10 @@ class RedisCluster(AbstractRedis, AbstractRedisCluster, AsyncRedisClusterCommand
110
112
:param startup_nodes:
111
113
| :class:`~.ClusterNode` to used as a startup node
112
114
:param require_full_coverage:
113
- | When set to ``False``: the client will not require a full coverage of the
114
- slots. However, if not all slots are covered, and at least one node has
115
- ``cluster-require-full-coverage`` set to ``yes``, the server will throw a
116
- :class:`~.ClusterDownError` for some key-based commands.
115
+ | When set to ``False``: the client will not require a full coverage of
116
+ the slots. However, if not all slots are covered, and at least one node
117
+ has ``cluster-require-full-coverage`` set to ``yes``, the server will throw
118
+ a :class:`~.ClusterDownError` for some key-based commands.
117
119
| When set to ``True``: all slots must be covered to construct the cluster
118
120
client. If not all slots are covered, :class:`~.RedisClusterException` will be
119
121
thrown.
@@ -136,7 +138,10 @@ class RedisCluster(AbstractRedis, AbstractRedisCluster, AsyncRedisClusterCommand
136
138
or :class:`~.ConnectionError` or :class:`~.ClusterDownError` are encountered
137
139
:param connection_error_retry_attempts:
138
140
| Number of times to retry before reinitializing when :class:`~.TimeoutError`
139
- or :class:`~.ConnectionError` are encountered
141
+ or :class:`~.ConnectionError` are encountered.
142
+ The default backoff strategy will be set if Retry object is not passed (see
143
+ default_backoff in backoff.py). To change it, pass a custom Retry object
144
+ using the "retry" keyword.
140
145
:param max_connections:
141
146
| Maximum number of connections per node. If there are no free connections & the
142
147
maximum number of connections are already created, a
@@ -214,9 +219,9 @@ def __init__(
214
219
startup_nodes : Optional [List ["ClusterNode" ]] = None ,
215
220
require_full_coverage : bool = True ,
216
221
read_from_replicas : bool = False ,
217
- reinitialize_steps : int = 10 ,
222
+ reinitialize_steps : int = 5 ,
218
223
cluster_error_retry_attempts : int = 3 ,
219
- connection_error_retry_attempts : int = 5 ,
224
+ connection_error_retry_attempts : int = 3 ,
220
225
max_connections : int = 2 ** 31 ,
221
226
# Client related kwargs
222
227
db : Union [str , int ] = 0 ,
@@ -235,6 +240,8 @@ def __init__(
235
240
socket_keepalive : bool = False ,
236
241
socket_keepalive_options : Optional [Mapping [int , Union [int , bytes ]]] = None ,
237
242
socket_timeout : Optional [float ] = None ,
243
+ retry : Optional ["Retry" ] = None ,
244
+ retry_on_error : Optional [List [Exception ]] = None ,
238
245
# SSL related kwargs
239
246
ssl : bool = False ,
240
247
ssl_ca_certs : Optional [str ] = None ,
@@ -282,6 +289,7 @@ def __init__(
282
289
"socket_keepalive" : socket_keepalive ,
283
290
"socket_keepalive_options" : socket_keepalive_options ,
284
291
"socket_timeout" : socket_timeout ,
292
+ "retry" : retry ,
285
293
}
286
294
287
295
if ssl :
@@ -302,6 +310,18 @@ def __init__(
302
310
# Call our on_connect function to configure READONLY mode
303
311
kwargs ["redis_connect_func" ] = self .on_connect
304
312
313
+ self .retry = retry
314
+ if retry or retry_on_error or connection_error_retry_attempts > 0 :
315
+ # Set a retry object for all cluster nodes
316
+ self .retry = retry or Retry (
317
+ default_backoff (), connection_error_retry_attempts
318
+ )
319
+ if not retry_on_error :
320
+ # Default errors for retrying
321
+ retry_on_error = [ConnectionError , TimeoutError ]
322
+ self .retry .update_supported_errors (retry_on_error )
323
+ kwargs .update ({"retry" : self .retry })
324
+
305
325
kwargs ["response_callbacks" ] = self .__class__ .RESPONSE_CALLBACKS .copy ()
306
326
self .connection_kwargs = kwargs
307
327
@@ -323,7 +343,6 @@ def __init__(
323
343
self .reinitialize_steps = reinitialize_steps
324
344
self .cluster_error_retry_attempts = cluster_error_retry_attempts
325
345
self .connection_error_retry_attempts = connection_error_retry_attempts
326
-
327
346
self .reinitialize_counter = 0
328
347
self .commands_parser = CommandsParser ()
329
348
self .node_flags = self .__class__ .NODE_FLAGS .copy ()
@@ -481,6 +500,16 @@ def get_connection_kwargs(self) -> Dict[str, Optional[Any]]:
481
500
"""Get the kwargs passed to :class:`~redis.asyncio.connection.Connection`."""
482
501
return self .connection_kwargs
483
502
503
+ def get_retry (self ) -> Optional ["Retry" ]:
504
+ return self .retry
505
+
506
+ def set_retry (self , retry : "Retry" ) -> None :
507
+ self .retry = retry
508
+ for node in self .get_nodes ():
509
+ node .connection_kwargs .update ({"retry" : retry })
510
+ for conn in node ._connections :
511
+ conn .retry = retry
512
+
484
513
def set_response_callback (self , command : str , callback : ResponseCallbackT ) -> None :
485
514
"""Set a custom response callback."""
486
515
self .response_callbacks [command ] = callback
@@ -618,9 +647,11 @@ async def execute_command(self, *args: EncodableT, **kwargs: Any) -> Any:
618
647
if passed_targets and not self ._is_node_flag (passed_targets ):
619
648
target_nodes = self ._parse_target_nodes (passed_targets )
620
649
target_nodes_specified = True
621
- retry_attempts = 1
650
+ retry_attempts = 0
622
651
623
- for _ in range (retry_attempts ):
652
+ # Add one for the first execution
653
+ execute_attempts = 1 + retry_attempts
654
+ for _ in range (execute_attempts ):
624
655
if self ._initialize :
625
656
await self .initialize ()
626
657
try :
@@ -658,25 +689,21 @@ async def execute_command(self, *args: EncodableT, **kwargs: Any) -> Any:
658
689
)
659
690
return dict (zip (keys , values ))
660
691
except Exception as e :
661
- if type (e ) in self .__class__ .ERRORS_ALLOW_RETRY :
662
- # The nodes and slots cache were reinitialized.
692
+ if retry_attempts > 0 and type (e ) in self .__class__ .ERRORS_ALLOW_RETRY :
693
+ # The nodes and slots cache were should be reinitialized.
663
694
# Try again with the new cluster setup.
664
- exception = e
695
+ retry_attempts -= 1
696
+ continue
665
697
else :
666
- # All other errors should be raised.
667
- raise
668
-
669
- # If it fails the configured number of times then raise exception back
670
- # to caller of this method
671
- raise exception
698
+ # raise the exception
699
+ raise e
672
700
673
701
async def _execute_command (
674
702
self , target_node : "ClusterNode" , * args : Union [KeyT , EncodableT ], ** kwargs : Any
675
703
) -> Any :
676
704
asking = moved = False
677
705
redirect_addr = None
678
706
ttl = self .RedisClusterRequestTTL
679
- connection_error_retry_counter = 0
680
707
681
708
while ttl > 0 :
682
709
ttl -= 1
@@ -695,25 +722,18 @@ async def _execute_command(
695
722
moved = False
696
723
697
724
return await target_node .execute_command (* args , ** kwargs )
698
- except BusyLoadingError :
725
+ except (BusyLoadingError , MaxConnectionsError ):
726
+ raise
727
+ except (ConnectionError , TimeoutError ):
728
+ # Connection retries are being handled in the node's
729
+ # Retry object.
730
+ # Remove the failed node from the startup nodes before we try
731
+ # to reinitialize the cluster
732
+ self .nodes_manager .startup_nodes .pop (target_node .name , None )
733
+ # Hard force of reinitialize of the node/slots setup
734
+ # and try again with the new setup
735
+ await self .close ()
699
736
raise
700
- except (ConnectionError , TimeoutError ) as e :
701
- # Give the node 0.25 seconds to get back up and retry again with the
702
- # same node and configuration. After the defined number of attempts, try
703
- # to reinitialize the cluster and try again.
704
- connection_error_retry_counter += 1
705
- if (
706
- connection_error_retry_counter
707
- < self .connection_error_retry_attempts
708
- ):
709
- await asyncio .sleep (0.25 )
710
- else :
711
- if isinstance (e , MaxConnectionsError ):
712
- raise
713
- # Hard force of reinitialize of the node/slots setup
714
- # and try again with the new setup
715
- await self .close ()
716
- raise
717
737
except ClusterDownError :
718
738
# ClusterDownError can occur during a failover and to get
719
739
# self-healed, we will try to reinitialize the cluster layout
@@ -1145,26 +1165,11 @@ async def initialize(self) -> None:
1145
1165
)
1146
1166
cluster_slots = await startup_node .execute_command ("CLUSTER SLOTS" )
1147
1167
startup_nodes_reachable = True
1148
- except (ConnectionError , TimeoutError ) as e :
1168
+ except Exception as e :
1169
+ # Try the next startup node.
1170
+ # The exception is saved and raised only if we have no more nodes.
1149
1171
exception = e
1150
1172
continue
1151
- except ResponseError as e :
1152
- # Isn't a cluster connection, so it won't parse these
1153
- # exceptions automatically
1154
- message = e .__str__ ()
1155
- if "CLUSTERDOWN" in message or "MASTERDOWN" in message :
1156
- continue
1157
- else :
1158
- raise RedisClusterException (
1159
- 'ERROR sending "cluster slots" command to redis '
1160
- f"server: { startup_node } . error: { message } "
1161
- )
1162
- except Exception as e :
1163
- message = e .__str__ ()
1164
- raise RedisClusterException (
1165
- 'ERROR sending "cluster slots" command to redis '
1166
- f"server { startup_node .name } . error: { message } "
1167
- )
1168
1173
1169
1174
# CLUSTER SLOTS command results in the following output:
1170
1175
# [[slot_section[from_slot,to_slot,master,replica1,...,replicaN]]]
@@ -1245,8 +1250,8 @@ async def initialize(self) -> None:
1245
1250
1246
1251
if not startup_nodes_reachable :
1247
1252
raise RedisClusterException (
1248
- "Redis Cluster cannot be connected. Please provide at least "
1249
- "one reachable node. "
1253
+ f "Redis Cluster cannot be connected. Please provide at least "
1254
+ f "one reachable node: { str ( exception ) } "
1250
1255
) from exception
1251
1256
1252
1257
# Check if the slots are not fully covered
0 commit comments