Skip to content

Commit fee1e2d

Browse files
KennyStrykercopybara-github
authored andcommitted
Copybara import of the project:
-- 7135692 by Kenny Stryker <[email protected]>: feat(matching-engine): add sync argument to deploy_index -- e0eb35d by Kenny Stryker <[email protected]>: feat: Updated explicit sync to existing decorator optional_sync -- af64657 by Kenny Stryker <[email protected]>: fix: Fixed return type for deploy_index and added test for sync values -- 4a1edb1 by Kenny Stryker <[email protected]>: test: Added test cases for sync values in automatic config deploy index -- b0d4e47 by Kenny Stryker <[email protected]>: chore: Updated docstring for helper method _deploy_index COPYBARA_INTEGRATE_REVIEW=#5305 from KennyStryker:main cc7ebe1 PiperOrigin-RevId: 776289436
1 parent 9fd40ae commit fee1e2d

File tree

2 files changed

+171
-3
lines changed

2 files changed

+171
-3
lines changed

google/cloud/aiplatform/matching_engine/matching_engine_index_endpoint.py

Lines changed: 159 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1112,6 +1112,7 @@ def deploy_index(
11121112
auth_config_audiences: Optional[Sequence[str]] = None,
11131113
auth_config_allowed_issuers: Optional[Sequence[str]] = None,
11141114
request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
1115+
sync: bool = True,
11151116
deploy_request_timeout: Optional[float] = None,
11161117
psc_automation_configs: Optional[Sequence[Tuple[str, str]]] = None,
11171118
) -> "MatchingEngineIndexEndpoint":
@@ -1207,10 +1208,167 @@ def deploy_index(
12071208
auth_config_audiences and auth_config_allowed_issuers must be passed together.
12081209
request_metadata (Sequence[Tuple[str, str]]):
12091210
Optional. Strings which should be sent along with the request as metadata.
1210-
1211+
sync (bool):
1212+
Whether to execute this method synchronously. If False, this method
1213+
will be executed in a concurrent Future and any downstream object will
1214+
be immediately returned and synced when the Future has completed.
12111215
deploy_request_timeout (float):
12121216
Optional. The timeout for the request in seconds.
1217+
psc_automation_configs (Sequence[Tuple[str, str]]):
1218+
Optional. A list of (project_id, network) pairs for Private
1219+
Service Connection endpoints to be setup for the deployed index.
1220+
The project_id is the project number of the project that the
1221+
network is in, and network is the name of the network.
1222+
Network is the full name of the Google Compute Engine
1223+
`network <https://cloud.google.com/compute/docs/networks-and-firewalls#networks>`__
1224+
to which the index should be deployed to.
1225+
1226+
Format:{projects/{project}/global/networks/{network}. Where
1227+
{project} is a project number, as in '12345', and {network}
1228+
is network name.
1229+
1230+
For example:
1231+
[(project_id_1, network_1), (project_id_1, network_2))] will enable
1232+
PSC automation for the index to be deployed to project_id_1's network_1
1233+
and network_2 and can be queried within these networks.
1234+
Returns:
1235+
MatchingEngineIndexEndpoint - IndexEndpoint resource object
1236+
"""
1237+
return self._deploy_index(
1238+
index=index,
1239+
deployed_index_id=deployed_index_id,
1240+
display_name=display_name,
1241+
machine_type=machine_type,
1242+
min_replica_count=min_replica_count,
1243+
max_replica_count=max_replica_count,
1244+
enable_access_logging=enable_access_logging,
1245+
reserved_ip_ranges=reserved_ip_ranges,
1246+
deployment_group=deployment_group,
1247+
auth_config_audiences=auth_config_audiences,
1248+
auth_config_allowed_issuers=auth_config_allowed_issuers,
1249+
request_metadata=request_metadata,
1250+
sync=sync,
1251+
deploy_request_timeout=deploy_request_timeout,
1252+
psc_automation_configs=psc_automation_configs,
1253+
)
1254+
1255+
@base.optional_sync(return_input_arg="self")
1256+
def _deploy_index(
1257+
self,
1258+
index: matching_engine.MatchingEngineIndex,
1259+
deployed_index_id: str,
1260+
display_name: Optional[str] = None,
1261+
machine_type: Optional[str] = None,
1262+
min_replica_count: Optional[int] = None,
1263+
max_replica_count: Optional[int] = None,
1264+
enable_access_logging: Optional[bool] = None,
1265+
reserved_ip_ranges: Optional[Sequence[str]] = None,
1266+
deployment_group: Optional[str] = None,
1267+
auth_config_audiences: Optional[Sequence[str]] = None,
1268+
auth_config_allowed_issuers: Optional[Sequence[str]] = None,
1269+
request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
1270+
sync: bool = True,
1271+
deploy_request_timeout: Optional[float] = None,
1272+
psc_automation_configs: Optional[Sequence[Tuple[str, str]]] = None,
1273+
) -> "MatchingEngineIndexEndpoint":
1274+
"""Helper method to deploy an existing index resource to this endpoint resource.
1275+
1276+
Args:
1277+
index (MatchingEngineIndex):
1278+
Required. The Index this is the
1279+
deployment of. We may refer to this Index as the
1280+
DeployedIndex's "original" Index.
1281+
deployed_index_id (str):
1282+
Required. The user specified ID of the
1283+
DeployedIndex. The ID can be up to 128
1284+
characters long and must start with a letter and
1285+
only contain letters, numbers, and underscores.
1286+
The ID must be unique within the project it is
1287+
created in.
1288+
display_name (str):
1289+
The display name of the DeployedIndex. If not provided upon
1290+
creation, the Index's display_name is used.
1291+
machine_type (str):
1292+
Optional. The type of machine. Not specifying machine type will
1293+
result in model to be deployed with automatic resources.
1294+
min_replica_count (int):
1295+
Optional. The minimum number of machine replicas this deployed
1296+
model will be always deployed on. If traffic against it increases,
1297+
it may dynamically be deployed onto more replicas, and as traffic
1298+
decreases, some of these extra replicas may be freed.
1299+
1300+
If this value is not provided, the value of 2 will be used.
1301+
max_replica_count (int):
1302+
Optional. The maximum number of replicas this deployed model may
1303+
be deployed on when the traffic against it increases. If requested
1304+
value is too large, the deployment will error, but if deployment
1305+
succeeds then the ability to scale the model to that many replicas
1306+
is guaranteed (barring service outages). If traffic against the
1307+
deployed model increases beyond what its replicas at maximum may
1308+
handle, a portion of the traffic will be dropped. If this value
1309+
is not provided, the larger value of min_replica_count or 2 will
1310+
be used. If value provided is smaller than min_replica_count, it
1311+
will automatically be increased to be min_replica_count.
1312+
enable_access_logging (bool):
1313+
Optional. If true, private endpoint's access
1314+
logs are sent to StackDriver Logging.
1315+
These logs are like standard server access logs,
1316+
containing information like timestamp and
1317+
latency for each MatchRequest.
1318+
Note that Stackdriver logs may incur a cost,
1319+
especially if the deployed index receives a high
1320+
queries per second rate (QPS). Estimate your
1321+
costs before enabling this option.
1322+
reserved_ip_ranges (Sequence[str]):
1323+
Optional. A list of reserved ip ranges under
1324+
the VPC network that can be used for this
1325+
DeployedIndex.
1326+
If set, we will deploy the index within the
1327+
provided ip ranges. Otherwise, the index might
1328+
be deployed to any ip ranges under the provided
1329+
VPC network.
1330+
1331+
The value sohuld be the name of the address
1332+
(https://cloud.google.com/compute/docs/reference/rest/v1/addresses)
1333+
Example: 'vertex-ai-ip-range'.
1334+
deployment_group (str):
1335+
Optional. The deployment group can be no longer than 64
1336+
characters (eg: 'test', 'prod'). If not set, we will use the
1337+
'default' deployment group.
1338+
1339+
Creating ``deployment_groups`` with ``reserved_ip_ranges``
1340+
is a recommended practice when the peered network has
1341+
multiple peering ranges. This creates your deployments from
1342+
predictable IP spaces for easier traffic administration.
1343+
Also, one deployment_group (except 'default') can only be
1344+
used with the same reserved_ip_ranges which means if the
1345+
deployment_group has been used with reserved_ip_ranges: [a,
1346+
b, c], using it with [a, b] or [d, e] is disallowed.
1347+
1348+
Note: we only support up to 5 deployment groups(not
1349+
including 'default').
1350+
auth_config_audiences (Sequence[str]):
1351+
The list of JWT
1352+
`audiences <https://tools.ietf.org/html/draft-ietf-oauth-json-web-token-32#section-4.1.3>`__.
1353+
that are allowed to access. A JWT containing any of these
1354+
audiences will be accepted.
1355+
1356+
auth_config_audiences and auth_config_allowed_issuers must be passed together.
1357+
auth_config_allowed_issuers (Sequence[str]):
1358+
A list of allowed JWT issuers. Each entry must be a valid
1359+
Google service account, in the following format:
12131360
1361+
1362+
1363+
auth_config_audiences and auth_config_allowed_issuers must be passed together.
1364+
request_metadata (Sequence[Tuple[str, str]]):
1365+
Optional. Strings which should be sent along with the request as metadata.
1366+
sync (bool):
1367+
Whether to execute this method synchronously. If False, this method
1368+
will be executed in a concurrent Future and any downstream object will
1369+
be immediately returned and synced when the Future has completed.
1370+
deploy_request_timeout (float):
1371+
Optional. The timeout for the request in seconds.
12141372
psc_automation_configs (Sequence[Tuple[str, str]]):
12151373
Optional. A list of (project_id, network) pairs for Private
12161374
Service Connection endpoints to be setup for the deployed index.

tests/unit/aiplatform/test_matching_engine_index_endpoint.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,7 +1196,8 @@ def test_create_index_endpoint_set_both_psc_and_public_throw_error(
11961196
assert str(exception.value) == expected_message
11971197

11981198
@pytest.mark.usefixtures("get_index_endpoint_mock", "get_index_mock")
1199-
def test_deploy_index(self, deploy_index_mock, undeploy_index_mock):
1199+
@pytest.mark.parametrize("sync", [True, False])
1200+
def test_deploy_index(self, deploy_index_mock, undeploy_index_mock, sync):
12001201
aiplatform.init(project=_TEST_PROJECT)
12011202

12021203
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
@@ -1218,9 +1219,13 @@ def test_deploy_index(self, deploy_index_mock, undeploy_index_mock):
12181219
auth_config_audiences=_TEST_AUTH_CONFIG_AUDIENCES,
12191220
auth_config_allowed_issuers=_TEST_AUTH_CONFIG_ALLOWED_ISSUERS,
12201221
request_metadata=_TEST_REQUEST_METADATA,
1222+
sync=sync,
12211223
deploy_request_timeout=_TEST_TIMEOUT,
12221224
)
12231225

1226+
if not sync:
1227+
my_index_endpoint.wait()
1228+
12241229
deploy_index_mock.assert_called_once_with(
12251230
index_endpoint=my_index_endpoint.resource_name,
12261231
deployed_index=gca_index_endpoint.DeployedIndex(
@@ -1257,7 +1262,8 @@ def test_deploy_index(self, deploy_index_mock, undeploy_index_mock):
12571262
)
12581263

12591264
@pytest.mark.usefixtures("get_psc_automated_index_endpoint_mock", "get_index_mock")
1260-
def test_deploy_index_psc_automation_configs(self, deploy_index_mock):
1265+
@pytest.mark.parametrize("sync", [True, False])
1266+
def test_deploy_index_psc_automation_configs(self, deploy_index_mock, sync):
12611267
aiplatform.init(project=_TEST_PROJECT)
12621268

12631269
my_index_endpoint = aiplatform.MatchingEngineIndexEndpoint(
@@ -1280,9 +1286,13 @@ def test_deploy_index_psc_automation_configs(self, deploy_index_mock):
12801286
auth_config_allowed_issuers=_TEST_AUTH_CONFIG_ALLOWED_ISSUERS,
12811287
psc_automation_configs=_TEST_PSC_AUTOMATION_CONFIGS,
12821288
request_metadata=_TEST_REQUEST_METADATA,
1289+
sync=sync,
12831290
deploy_request_timeout=_TEST_TIMEOUT,
12841291
)
12851292

1293+
if not sync:
1294+
my_index_endpoint.wait()
1295+
12861296
deploy_index_mock.assert_called_once_with(
12871297
index_endpoint=my_index_endpoint.resource_name,
12881298
deployed_index=gca_index_endpoint.DeployedIndex(

0 commit comments

Comments
 (0)