@@ -1112,6 +1112,7 @@ def deploy_index(
1112
1112
auth_config_audiences : Optional [Sequence [str ]] = None ,
1113
1113
auth_config_allowed_issuers : Optional [Sequence [str ]] = None ,
1114
1114
request_metadata : Optional [Sequence [Tuple [str , str ]]] = (),
1115
+ sync : bool = True ,
1115
1116
deploy_request_timeout : Optional [float ] = None ,
1116
1117
psc_automation_configs : Optional [Sequence [Tuple [str , str ]]] = None ,
1117
1118
) -> "MatchingEngineIndexEndpoint" :
@@ -1207,10 +1208,167 @@ def deploy_index(
1207
1208
auth_config_audiences and auth_config_allowed_issuers must be passed together.
1208
1209
request_metadata (Sequence[Tuple[str, str]]):
1209
1210
Optional. Strings which should be sent along with the request as metadata.
1210
-
1211
+ sync (bool):
1212
+ Whether to execute this method synchronously. If False, this method
1213
+ will be executed in a concurrent Future and any downstream object will
1214
+ be immediately returned and synced when the Future has completed.
1211
1215
deploy_request_timeout (float):
1212
1216
Optional. The timeout for the request in seconds.
1217
+ psc_automation_configs (Sequence[Tuple[str, str]]):
1218
+ Optional. A list of (project_id, network) pairs for Private
1219
+ Service Connection endpoints to be setup for the deployed index.
1220
+ The project_id is the project number of the project that the
1221
+ network is in, and network is the name of the network.
1222
+ Network is the full name of the Google Compute Engine
1223
+ `network <https://cloud.google.com/compute/docs/networks-and-firewalls#networks>`__
1224
+ to which the index should be deployed to.
1225
+
1226
+ Format:{projects/{project}/global/networks/{network}. Where
1227
+ {project} is a project number, as in '12345', and {network}
1228
+ is network name.
1229
+
1230
+ For example:
1231
+ [(project_id_1, network_1), (project_id_1, network_2))] will enable
1232
+ PSC automation for the index to be deployed to project_id_1's network_1
1233
+ and network_2 and can be queried within these networks.
1234
+ Returns:
1235
+ MatchingEngineIndexEndpoint - IndexEndpoint resource object
1236
+ """
1237
+ return self ._deploy_index (
1238
+ index = index ,
1239
+ deployed_index_id = deployed_index_id ,
1240
+ display_name = display_name ,
1241
+ machine_type = machine_type ,
1242
+ min_replica_count = min_replica_count ,
1243
+ max_replica_count = max_replica_count ,
1244
+ enable_access_logging = enable_access_logging ,
1245
+ reserved_ip_ranges = reserved_ip_ranges ,
1246
+ deployment_group = deployment_group ,
1247
+ auth_config_audiences = auth_config_audiences ,
1248
+ auth_config_allowed_issuers = auth_config_allowed_issuers ,
1249
+ request_metadata = request_metadata ,
1250
+ sync = sync ,
1251
+ deploy_request_timeout = deploy_request_timeout ,
1252
+ psc_automation_configs = psc_automation_configs ,
1253
+ )
1254
+
1255
+ @base .optional_sync (return_input_arg = "self" )
1256
+ def _deploy_index (
1257
+ self ,
1258
+ index : matching_engine .MatchingEngineIndex ,
1259
+ deployed_index_id : str ,
1260
+ display_name : Optional [str ] = None ,
1261
+ machine_type : Optional [str ] = None ,
1262
+ min_replica_count : Optional [int ] = None ,
1263
+ max_replica_count : Optional [int ] = None ,
1264
+ enable_access_logging : Optional [bool ] = None ,
1265
+ reserved_ip_ranges : Optional [Sequence [str ]] = None ,
1266
+ deployment_group : Optional [str ] = None ,
1267
+ auth_config_audiences : Optional [Sequence [str ]] = None ,
1268
+ auth_config_allowed_issuers : Optional [Sequence [str ]] = None ,
1269
+ request_metadata : Optional [Sequence [Tuple [str , str ]]] = (),
1270
+ sync : bool = True ,
1271
+ deploy_request_timeout : Optional [float ] = None ,
1272
+ psc_automation_configs : Optional [Sequence [Tuple [str , str ]]] = None ,
1273
+ ) -> "MatchingEngineIndexEndpoint" :
1274
+ """Helper method to deploy an existing index resource to this endpoint resource.
1275
+
1276
+ Args:
1277
+ index (MatchingEngineIndex):
1278
+ Required. The Index this is the
1279
+ deployment of. We may refer to this Index as the
1280
+ DeployedIndex's "original" Index.
1281
+ deployed_index_id (str):
1282
+ Required. The user specified ID of the
1283
+ DeployedIndex. The ID can be up to 128
1284
+ characters long and must start with a letter and
1285
+ only contain letters, numbers, and underscores.
1286
+ The ID must be unique within the project it is
1287
+ created in.
1288
+ display_name (str):
1289
+ The display name of the DeployedIndex. If not provided upon
1290
+ creation, the Index's display_name is used.
1291
+ machine_type (str):
1292
+ Optional. The type of machine. Not specifying machine type will
1293
+ result in model to be deployed with automatic resources.
1294
+ min_replica_count (int):
1295
+ Optional. The minimum number of machine replicas this deployed
1296
+ model will be always deployed on. If traffic against it increases,
1297
+ it may dynamically be deployed onto more replicas, and as traffic
1298
+ decreases, some of these extra replicas may be freed.
1299
+
1300
+ If this value is not provided, the value of 2 will be used.
1301
+ max_replica_count (int):
1302
+ Optional. The maximum number of replicas this deployed model may
1303
+ be deployed on when the traffic against it increases. If requested
1304
+ value is too large, the deployment will error, but if deployment
1305
+ succeeds then the ability to scale the model to that many replicas
1306
+ is guaranteed (barring service outages). If traffic against the
1307
+ deployed model increases beyond what its replicas at maximum may
1308
+ handle, a portion of the traffic will be dropped. If this value
1309
+ is not provided, the larger value of min_replica_count or 2 will
1310
+ be used. If value provided is smaller than min_replica_count, it
1311
+ will automatically be increased to be min_replica_count.
1312
+ enable_access_logging (bool):
1313
+ Optional. If true, private endpoint's access
1314
+ logs are sent to StackDriver Logging.
1315
+ These logs are like standard server access logs,
1316
+ containing information like timestamp and
1317
+ latency for each MatchRequest.
1318
+ Note that Stackdriver logs may incur a cost,
1319
+ especially if the deployed index receives a high
1320
+ queries per second rate (QPS). Estimate your
1321
+ costs before enabling this option.
1322
+ reserved_ip_ranges (Sequence[str]):
1323
+ Optional. A list of reserved ip ranges under
1324
+ the VPC network that can be used for this
1325
+ DeployedIndex.
1326
+ If set, we will deploy the index within the
1327
+ provided ip ranges. Otherwise, the index might
1328
+ be deployed to any ip ranges under the provided
1329
+ VPC network.
1330
+
1331
+ The value sohuld be the name of the address
1332
+ (https://cloud.google.com/compute/docs/reference/rest/v1/addresses)
1333
+ Example: 'vertex-ai-ip-range'.
1334
+ deployment_group (str):
1335
+ Optional. The deployment group can be no longer than 64
1336
+ characters (eg: 'test', 'prod'). If not set, we will use the
1337
+ 'default' deployment group.
1338
+
1339
+ Creating ``deployment_groups`` with ``reserved_ip_ranges``
1340
+ is a recommended practice when the peered network has
1341
+ multiple peering ranges. This creates your deployments from
1342
+ predictable IP spaces for easier traffic administration.
1343
+ Also, one deployment_group (except 'default') can only be
1344
+ used with the same reserved_ip_ranges which means if the
1345
+ deployment_group has been used with reserved_ip_ranges: [a,
1346
+ b, c], using it with [a, b] or [d, e] is disallowed.
1347
+
1348
+ Note: we only support up to 5 deployment groups(not
1349
+ including 'default').
1350
+ auth_config_audiences (Sequence[str]):
1351
+ The list of JWT
1352
+ `audiences <https://tools.ietf.org/html/draft-ietf-oauth-json-web-token-32#section-4.1.3>`__.
1353
+ that are allowed to access. A JWT containing any of these
1354
+ audiences will be accepted.
1355
+
1356
+ auth_config_audiences and auth_config_allowed_issuers must be passed together.
1357
+ auth_config_allowed_issuers (Sequence[str]):
1358
+ A list of allowed JWT issuers. Each entry must be a valid
1359
+ Google service account, in the following format:
1213
1360
1361
+
1362
+
1363
+ auth_config_audiences and auth_config_allowed_issuers must be passed together.
1364
+ request_metadata (Sequence[Tuple[str, str]]):
1365
+ Optional. Strings which should be sent along with the request as metadata.
1366
+ sync (bool):
1367
+ Whether to execute this method synchronously. If False, this method
1368
+ will be executed in a concurrent Future and any downstream object will
1369
+ be immediately returned and synced when the Future has completed.
1370
+ deploy_request_timeout (float):
1371
+ Optional. The timeout for the request in seconds.
1214
1372
psc_automation_configs (Sequence[Tuple[str, str]]):
1215
1373
Optional. A list of (project_id, network) pairs for Private
1216
1374
Service Connection endpoints to be setup for the deployed index.
0 commit comments