Skip to content

bugfix: fix infinite loop on KafkaAdminClient #2194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 50 additions & 15 deletions kafka/admin/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def _refresh_controller_id(self):
version = self._matching_api_version(MetadataRequest)
if 1 <= version <= 6:
request = MetadataRequest[version]()
future = self._send_request_to_node(self._client.least_loaded_node(), request)
future = self._send_request_to_least_loaded_node(request)

self._wait_for_futures([future])

Expand Down Expand Up @@ -310,7 +310,7 @@ def _find_coordinator_id_send_request(self, group_id):
raise NotImplementedError(
"Support for GroupCoordinatorRequest_v{} has not yet been added to KafkaAdminClient."
.format(version))
return self._send_request_to_node(self._client.least_loaded_node(), request)
return self._send_request_to_least_loaded_node(request)

def _find_coordinator_id_process_response(self, response):
"""Process a FindCoordinatorResponse.
Expand Down Expand Up @@ -355,9 +355,36 @@ def _find_coordinator_ids(self, group_ids):
}
return groups_coordinators

def _send_request_to_least_loaded_node(self, request):
"""Send a Kafka protocol message to the least loaded broker.

Returns a future that may be polled for status and results.

:param request: The message to send.
:return: A future object that may be polled for status and results.
:exception: The exception if the message could not be sent.
"""
node_id = self._client.least_loaded_node()
while not self._client.ready(node_id):
# poll until the connection to broker is ready, otherwise send()
# will fail with NodeNotReadyError
self._client.poll()

# node_id is not part of the cluster anymore, choose a new broker
# to connect to
if self._client.cluster.broker_metadata(node_id) is None:
node_id = self._client.least_loaded_node()

return self._client.send(node_id, request)

def _send_request_to_node(self, node_id, request, wakeup=True):
"""Send a Kafka protocol message to a specific broker.

.. note::

This function will enter in an infinite loop if `node_id` is
removed from the cluster.

Returns a future that may be polled for status and results.

:param node_id: The broker id to which to send the message.
Expand All @@ -383,10 +410,23 @@ def _send_request_to_controller(self, request):
tries = 2 # in case our cached self._controller_id is outdated
while tries:
tries -= 1
future = self._send_request_to_node(self._controller_id, request)
future = self._client.send(self._controller_id, request)

self._wait_for_futures([future])

if future.exception is not None:
log.error(
"Sending request to controller_id %s failed with %s",
self._controller_id,
future.exception,
)
is_outdated_controler = (
self._client.cluster.broker_metadata(self._controller_id) is None
)
if is_outdated_controler:
self._refresh_controller_id()
continue

response = future.value
# In Java, the error field name is inconsistent:
# - CreateTopicsResponse / CreatePartitionsResponse uses topic_errors
Expand Down Expand Up @@ -507,10 +547,7 @@ def _get_cluster_metadata(self, topics=None, auto_topic_creation=False):
allow_auto_topic_creation=auto_topic_creation
)

future = self._send_request_to_node(
self._client.least_loaded_node(),
request
)
future = self._send_request_to_least_loaded_node(request)
self._wait_for_futures([future])
return future.value

Expand Down Expand Up @@ -602,7 +639,7 @@ def describe_acls(self, acl_filter):
.format(version)
)

future = self._send_request_to_node(self._client.least_loaded_node(), request)
future = self._send_request_to_least_loaded_node(request)
self._wait_for_futures([future])
response = future.value

Expand Down Expand Up @@ -693,7 +730,7 @@ def create_acls(self, acls):
.format(version)
)

future = self._send_request_to_node(self._client.least_loaded_node(), request)
future = self._send_request_to_least_loaded_node(request)
self._wait_for_futures([future])
response = future.value

Expand Down Expand Up @@ -787,7 +824,7 @@ def delete_acls(self, acl_filters):
.format(version)
)

future = self._send_request_to_node(self._client.least_loaded_node(), request)
future = self._send_request_to_least_loaded_node(request)
self._wait_for_futures([future])
response = future.value

Expand Down Expand Up @@ -847,8 +884,7 @@ def describe_configs(self, config_resources, include_synonyms=False):
))

if len(topic_resources) > 0:
futures.append(self._send_request_to_node(
self._client.least_loaded_node(),
futures.append(self._send_request_to_least_loaded_node(
DescribeConfigsRequest[version](resources=topic_resources)
))

Expand All @@ -868,8 +904,7 @@ def describe_configs(self, config_resources, include_synonyms=False):
))

if len(topic_resources) > 0:
futures.append(self._send_request_to_node(
self._client.least_loaded_node(),
futures.append(self._send_request_to_least_loaded_node(
DescribeConfigsRequest[version](resources=topic_resources, include_synonyms=include_synonyms)
))
else:
Expand Down Expand Up @@ -916,7 +951,7 @@ def alter_configs(self, config_resources):
# // a single request that may be sent to any broker.
#
# So this is currently broken as it always sends to the least_loaded_node()
future = self._send_request_to_node(self._client.least_loaded_node(), request)
future = self._send_request_to_least_loaded_node(request)

self._wait_for_futures([future])
response = future.value
Expand Down