Skip to content

Commit 9f5bbd3

Browse files
abrarsheikhaslonnie
authored andcommitted
[3/n] [Serve] expose deployment topology in serve instance details (ray-project#58355)
### Summary This PR exposes deployment topology information in Ray Serve instance details, allowing users to visualize and understand the dependency graph of deployments within their applications. ### What's Changed #### New Data Structures Added two new schema classes to represent deployment topology: - **`DeploymentNode`** - Represents a node in the deployment DAG - **`DeploymentTopology`** - Represents the full dependency graph #### Implementation **Controller Integration** - Updated `ServeController` to include `deployment_topology` in `ApplicationDetails` when serving instance details - Topology is now accessible via the `get_serve_details()` API --- **Example Output:** ```python { "app_name": "my_app", "ingress_deployment": "Ingress", "nodes": { "Ingress": { "name": "Ingress", "is_ingress": True, "outbound_deployments": [ {"name": "ServiceA", "app_name": "my_app"} ] }, "ServiceA": { "name": "ServiceA", "is_ingress": False, "outbound_deployments": [ {"name": "Database", "app_name": "my_app"} ] }, "Database": { "name": "Database", "is_ingress": False, "outbound_deployments": [] } } } ``` --------- Signed-off-by: abrar <abrar@anyscale.com> Co-authored-by: Lonnie Liu <95255098+aslonnie@users.noreply.github.com>
1 parent 79f7572 commit 9f5bbd3

File tree

9 files changed

+1405
-1
lines changed

9 files changed

+1405
-1
lines changed

doc/source/serve/api/index.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,9 @@ Content-Type: application/json
416416
schema.ProxyStatus
417417
schema.TargetGroup
418418
schema.Target
419+
schema.DeploymentNode
420+
schema.DeploymentTopology
421+
419422
```
420423

421424
## Observability

python/ray/serve/_private/application_state.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@
6161
APIType,
6262
ApplicationStatus,
6363
DeploymentDetails,
64+
DeploymentNode,
65+
DeploymentTopology,
6466
LoggingConfig,
6567
ServeApplicationSchema,
6668
)
@@ -935,6 +937,46 @@ def _reconcile_target_deployments(self) -> None:
935937

936938
return target_state_changed
937939

940+
def get_deployment_topology(self) -> Optional[DeploymentTopology]:
941+
"""Get the deployment topology for this application.
942+
943+
Returns:
944+
The deployment topology, or None if not yet built.
945+
"""
946+
if not self.target_deployments:
947+
return None
948+
949+
nodes = {}
950+
951+
# Using target deployments because we wish to build best effort topology based on current state.
952+
for deployment_name in self.target_deployments:
953+
deployment_id = DeploymentID(name=deployment_name, app_name=self._name)
954+
955+
# Get outbound deployment names from deployment state
956+
outbound_deployment = (
957+
self._deployment_state_manager.get_deployment_outbound_deployments(
958+
deployment_id
959+
)
960+
) or []
961+
962+
# Create node for this deployment
963+
node = DeploymentNode(
964+
name=deployment_name,
965+
app_name=self._name,
966+
outbound_deployments=[
967+
{"name": dep.name, "app_name": dep.app_name}
968+
for dep in outbound_deployment
969+
],
970+
is_ingress=(deployment_name == self._ingress_deployment_name),
971+
)
972+
nodes[deployment_name] = node
973+
974+
return DeploymentTopology(
975+
app_name=self._name,
976+
ingress_deployment=self._ingress_deployment_name,
977+
nodes=nodes,
978+
)
979+
938980
def update(self) -> Tuple[bool, bool]:
939981
"""Attempts to reconcile this application to match its target state.
940982
@@ -1280,6 +1322,20 @@ def list_deployment_details(self, name: str) -> Dict[str, DeploymentDetails]:
12801322
return {}
12811323
return self._application_states[name].list_deployment_details()
12821324

1325+
def get_deployment_topology(self, app_name: str) -> Optional[DeploymentTopology]:
1326+
"""Get the deployment topology for an application.
1327+
1328+
Args:
1329+
app_name: Name of the application.
1330+
1331+
Returns:
1332+
The deployment topology for the application, or None if the application
1333+
doesn't exist or the topology hasn't been built yet.
1334+
"""
1335+
if app_name not in self._application_states:
1336+
return None
1337+
return self._application_states[app_name].get_deployment_topology()
1338+
12831339
def update(self):
12841340
"""Update each application state."""
12851341
apps_to_be_deleted = []

python/ray/serve/_private/controller.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,9 @@ def get_serve_instance_details(self, source: Optional[APIType] = None) -> Dict:
988988
deployments=self.application_state_manager.list_deployment_details(
989989
app_name
990990
),
991+
deployment_topology=self.application_state_manager.get_deployment_topology(
992+
app_name
993+
),
991994
)
992995

993996
# NOTE(zcin): We use exclude_unset here because we explicitly and intentionally

python/ray/serve/schema.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,46 @@ def get_valid_user_values(cls):
11091109
return [cls.IMPERATIVE.value, cls.DECLARATIVE.value]
11101110

11111111

1112+
@PublicAPI(stability="alpha")
1113+
class DeploymentNode(BaseModel):
1114+
"""Represents a node in the deployment topology.
1115+
1116+
Each node represents a deployment and tracks which other deployments it calls.
1117+
"""
1118+
1119+
name: str = Field(description="The name of the deployment.")
1120+
app_name: str = Field(
1121+
description="The name of the application this deployment belongs to."
1122+
)
1123+
# using name and app_name instead of just deployment name because outbound dependencies can be in different apps
1124+
outbound_deployments: List[dict] = Field(
1125+
default_factory=list,
1126+
description="The deployment IDs that this deployment calls (outbound dependencies).",
1127+
)
1128+
is_ingress: bool = Field(
1129+
default=False, description="Whether this is the ingress deployment."
1130+
)
1131+
1132+
1133+
@PublicAPI(stability="alpha")
1134+
class DeploymentTopology(BaseModel):
1135+
"""Represents the dependency graph of deployments in an application.
1136+
1137+
The topology shows which deployments call which other deployments,
1138+
with the ingress deployment as the entry point.
1139+
"""
1140+
1141+
app_name: str = Field(
1142+
description="The name of the application this topology belongs to."
1143+
)
1144+
nodes: Dict[str, DeploymentNode] = Field(
1145+
description="The adjacency list of deployment nodes."
1146+
)
1147+
ingress_deployment: Optional[str] = Field(
1148+
default=None, description="The name of the ingress deployment (entry point)."
1149+
)
1150+
1151+
11121152
@PublicAPI(stability="stable")
11131153
class ApplicationDetails(BaseModel, extra=Extra.forbid, frozen=True):
11141154
"""Detailed info about a Serve application."""
@@ -1172,6 +1212,11 @@ class ApplicationDetails(BaseModel, extra=Extra.forbid, frozen=True):
11721212
"route_prefix", allow_reuse=True
11731213
)(_route_prefix_format)
11741214

1215+
deployment_topology: Optional[DeploymentTopology] = Field(
1216+
default=None,
1217+
description="The deployment topology showing how deployments in this application call each other.",
1218+
)
1219+
11751220

11761221
@PublicAPI(stability="stable")
11771222
class ProxyDetails(ServeActorDetails, frozen=True):

python/ray/serve/tests/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ py_test_module_list(
110110
"test_controller_recovery.py",
111111
"test_deploy_2.py",
112112
"test_deployment_scheduler.py",
113+
"test_deployment_topology.py",
113114
"test_failure.py",
114115
"test_handle_1.py",
115116
"test_handle_2.py",

python/ray/serve/tests/test_controller.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,18 @@ def autoscaling_app():
216216
],
217217
}
218218
},
219+
"deployment_topology": {
220+
"app_name": "default",
221+
"nodes": {
222+
"autoscaling_app": {
223+
"name": "autoscaling_app",
224+
"app_name": "default",
225+
"outbound_deployments": [],
226+
"is_ingress": True,
227+
}
228+
},
229+
"ingress_deployment": "autoscaling_app",
230+
},
219231
}
220232
},
221233
"target_capacity": None,

0 commit comments

Comments
 (0)