Skip to content

Commit d84d0fd

Browse files
authored
remove expensive debug logs from controller (#57813)
logs in controller can be expensive for large cluster logs in autoscaling state and just noisy --------- Signed-off-by: abrar <abrar@anyscale.com>
1 parent d1ddcc8 commit d84d0fd

File tree

2 files changed

+0
-36
lines changed

2 files changed

+0
-36
lines changed

python/ray/serve/_private/autoscaling_state.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -932,21 +932,11 @@ def update_running_replica_ids(
932932
app_state = self._app_autoscaling_states.get(deployment_id.app_name)
933933
if app_state:
934934
app_state.update_running_replica_ids(deployment_id, running_replicas)
935-
else:
936-
logger.warning(
937-
f"Cannot update running replica ids for deployment "
938-
f"{deployment_id} because the application {deployment_id.app_name} is not registered"
939-
)
940935

941936
def on_replica_stopped(self, replica_id: ReplicaID):
942937
app_state = self._app_autoscaling_states.get(replica_id.deployment_id.app_name)
943938
if app_state:
944939
app_state.on_replica_stopped(replica_id)
945-
else:
946-
logger.warning(
947-
f"Cannot invoke callback on replica stopped for replica "
948-
f"{replica_id} because the application {replica_id.deployment_id.app_name} is not registered"
949-
)
950940

951941
def get_metrics_for_deployment(
952942
self, deployment_id: DeploymentID
@@ -956,10 +946,6 @@ def get_metrics_for_deployment(
956946
deployment_id.app_name
957947
].get_replica_metrics_by_deployment_id(deployment_id)
958948
else:
959-
logger.warning(
960-
f"Cannot get metrics for deployment "
961-
f"{deployment_id} because the application {deployment_id.app_name} is not registered"
962-
)
963949
return {}
964950

965951
def get_total_num_requests_for_deployment(
@@ -970,10 +956,6 @@ def get_total_num_requests_for_deployment(
970956
deployment_id.app_name
971957
].get_total_num_requests_for_deployment(deployment_id)
972958
else:
973-
logger.warning(
974-
f"Cannot get total number of requests for deployment "
975-
f"{deployment_id} because the application {deployment_id.app_name} is not registered"
976-
)
977959
return 0
978960

979961
def is_within_bounds(
@@ -992,11 +974,6 @@ def record_request_metrics_for_replica(
992974
)
993975
if app_state:
994976
app_state.record_request_metrics_for_replica(replica_metric_report)
995-
else:
996-
logger.warning(
997-
f"Cannot record request metrics for replica "
998-
f"{replica_metric_report.replica_id} because the application {replica_metric_report.replica_id.deployment_id.app_name} is not registered"
999-
)
1000977

1001978
def record_request_metrics_for_handle(
1002979
self,
@@ -1008,11 +985,6 @@ def record_request_metrics_for_handle(
1008985
)
1009986
if app_state:
1010987
app_state.record_request_metrics_for_handle(handle_metric_report)
1011-
else:
1012-
logger.warning(
1013-
f"Cannot record request metrics for handle "
1014-
f"{handle_metric_report.handle_id} because the application {handle_metric_report.deployment_id.app_name} is not registered"
1015-
)
1016988

1017989
def drop_stale_handle_metrics(self, alive_serve_actor_ids: Set[str]) -> None:
1018990
for app_state in self._app_autoscaling_states.values():

python/ray/serve/_private/controller.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from ray.serve._private.application_state import ApplicationStateManager, StatusOverview
2222
from ray.serve._private.autoscaling_state import AutoscalingStateManager
2323
from ray.serve._private.common import (
24-
RUNNING_REQUESTS_KEY,
2524
DeploymentID,
2625
HandleMetricReport,
2726
NodeId,
@@ -275,9 +274,6 @@ def get_pid(self) -> int:
275274
def record_autoscaling_metrics_from_replica(
276275
self, replica_metric_report: ReplicaMetricReport
277276
):
278-
logger.debug(
279-
f"Received metrics from replica {replica_metric_report.replica_id}: {replica_metric_report.aggregated_metrics.get(RUNNING_REQUESTS_KEY)} running requests"
280-
)
281277
latency = time.time() - replica_metric_report.timestamp
282278
latency_ms = latency * 1000
283279
if latency_ms > RAY_SERVE_RPC_LATENCY_WARNING_THRESHOLD_MS:
@@ -294,10 +290,6 @@ def record_autoscaling_metrics_from_replica(
294290
def record_autoscaling_metrics_from_handle(
295291
self, handle_metric_report: HandleMetricReport
296292
):
297-
logger.debug(
298-
f"Received metrics from handle {handle_metric_report.handle_id} for deployment {handle_metric_report.deployment_id}: "
299-
f"{handle_metric_report.queued_requests} queued requests and {handle_metric_report.aggregated_metrics[RUNNING_REQUESTS_KEY]} running requests"
300-
)
301293
latency = time.time() - handle_metric_report.timestamp
302294
latency_ms = latency * 1000
303295
if latency_ms > RAY_SERVE_RPC_LATENCY_WARNING_THRESHOLD_MS:

0 commit comments

Comments
 (0)