diff --git a/docs/book/stacks-and-components/component-guide/model-deployers/huggingface.md b/docs/book/stacks-and-components/component-guide/model-deployers/huggingface.md new file mode 100644 index 0000000000..695404956d --- /dev/null +++ b/docs/book/stacks-and-components/component-guide/model-deployers/huggingface.md @@ -0,0 +1,154 @@ +--- +description: Deploying models to Huggingface Inference Endpoints with Hugging Face :hugging_face:. +--- + +# Hugging Face :hugging_face: + +Hugging Face Inference Endpoints provides a secure production solution to easily deploy any `transformers`, `sentence-transformers`, and `diffusers` models on a dedicated and autoscaling infrastructure managed by Hugging Face. An Inference Endpoint is built from a model from the [Hub](https://huggingface.co/models). + +This service provides dedicated and autoscaling infrastructure managed by Hugging Face, allowing you to deploy models without dealing with containers and GPUs. + +## When to use it? + +You should use Hugging Face Model Deployer: + +* if you want to deploy [Transformers, Sentence-Transformers, or Diffusion models](https://huggingface.co/docs/inference-endpoints/supported_tasks) on dedicated and secure infrastructure. +* if you prefer a fully-managed production solution for inference without the need to handle containers and GPUs. +* if your goal is to turn your models into production-ready APIs with minimal infrastructure or MLOps involvement +* Cost-effectiveness is crucial, and you want to pay only for the raw compute resources you use. +* Enterprise security is a priority, and you need to deploy models into secure offline endpoints accessible only via a direct connection to your Virtual Private Cloud (VPCs). + +If you are looking for a more easy way to deploy your models locally, you can use the [MLflow Model Deployer](mlflow.md) flavor. + +## How to deploy it? + +The Hugging Face Model Deployer flavor is provided by the Hugging Face ZenML integration, so you need to install it on your local machine to be able to deploy your models. You can do this by running the following command: + +```bash +zenml integration install huggingface -y +``` + +To register the Hugging Face model deployer with ZenML you need to run the following command: + +```bash +zenml model-deployer register --flavor=huggingface --token= --namespace= +``` + +Here, + +* `token` parameter is the Hugging Face authentication token. It can be managed through [Hugging Face settings](https://huggingface.co/settings/tokens). +* `namespace` parameter is used for listing and creating the inference endpoints. It can take any of the following values, username or organization name or `*` depending on where the inference endpoint should be created. + +We can now use the model deployer in our stack. + +```bash +zenml stack update --model-deployer= +``` + +See the [huggingface_model_deployer_step](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-seldon/#zenml.integrations.huggingface.steps.huggingface_deployer.huggingface_model_deployer_step) for an example of using the Hugging Face Model Deployer to deploy a model inside a ZenML pipeline step. + +## Configuration + +Within the `HuggingFaceServiceConfig` you can configure: + +* `model_name`: the name of the model in ZenML. +* `endpoint_name`: the name of the inference endpoint. We add a prefix `zenml-` and first 8 characters of the service uuid as a suffix to the endpoint name. +* `repository`: The repository name in the user’s namespace (`{username}/{model_id}`) or in the organization namespace (`{organization}/{model_id}`) from the Hugging Face hub. +* `framework`: The machine learning framework used for the model (e.g. `"custom"`, `"pytorch"` ) +* `accelerator`: The hardware accelerator to be used for inference. (e.g. `"cpu"`, `"gpu"`) +* `instance_size`: The size of the instance to be used for hosting the model (e.g. `"large"`, `"xxlarge"`) +* `instance_type`: Inference Endpoints offers a selection of curated CPU and GPU instances. (e.g. `"c6i"`, `"g5.12xlarge"`) +* `region`: The cloud region in which the Inference Endpoint will be created (e.g. `"us-east-1"`, `"eu-west-1"` for `vendor = aws` and `"eastus"` for Microsoft Azure vendor.). +* `vendor`: The cloud provider or vendor where the Inference Endpoint will be hosted (e.g. `"aws"`). +* `token`: The Hugging Face authentication token. It can be managed through [huggingface settings](https://huggingface.co/settings/tokens). The same token can be passed used while registering the Hugging Face model deployer. +* `account_id`: (Optional) The account ID used to link a VPC to a private Inference Endpoint (if applicable). +* `min_replica`: (Optional) The minimum number of replicas (instances) to keep running for the Inference Endpoint. Defaults to `0`. +* `max_replica`: (Optional) The maximum number of replicas (instances) to scale to for the Inference Endpoint. Defaults to `1`. +* `revision`: (Optional) The specific model revision to deploy on the Inference Endpoint for the Hugging Face repository . +* `task`: Select a supported [Machine Learning Task](https://huggingface.co/docs/inference-endpoints/supported_tasks). (e.g. `"text-classification"`, `"text-generation"`) +* `custom_image`: (Optional) A custom Docker image to use for the Inference Endpoint. +* `namespace`: The namespace where the Inference Endpoint will be created. The same namespace can be passed used while registering the Hugging Face model deployer. +* `endpoint_type`: (Optional) The type of the Inference Endpoint, which can be `"protected"`, `"public"` (default) or `"private"`. + +For more information and a full list of configurable attributes of the Hugging Face Model Deployer, check out +the [SDK Docs](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-huggingface/#zenml.integrations.huggingface.model_deployers) and Hugging Face endpoint [code](https://github.com/huggingface/huggingface_hub/blob/5e3b603ccc7cd6523d998e75f82848215abf9415/src/huggingface_hub/hf_api.py#L6957). + +### Run inference on a provisioned inference endpoint + +The following code example shows how to run inference against a provisioned inference endpoint: + +```python +from typing import Annotated +from zenml import step, pipeline +from zenml.integrations.huggingface.model_deployers import HuggingFaceModelDeployer +from zenml.integrations.huggingface.services import HuggingFaceDeploymentService + + +# Load a prediction service deployed in another pipeline +@step(enable_cache=False) +def prediction_service_loader( + pipeline_name: str, + pipeline_step_name: str, + running: bool = True, + model_name: str = "default", +) -> HuggingFaceDeploymentService: + """Get the prediction service started by the deployment pipeline. + + Args: + pipeline_name: name of the pipeline that deployed the MLflow prediction + server + step_name: the name of the step that deployed the MLflow prediction + server + running: when this flag is set, the step only returns a running service + model_name: the name of the model that is deployed + """ + # get the Hugging Face model deployer stack component + model_deployer = HuggingFaceModelDeployer.get_active_model_deployer() + + # fetch existing services with same pipeline name, step name and model name + existing_services = model_deployer.find_model_server( + pipeline_name=pipeline_name, + pipeline_step_name=pipeline_step_name, + model_name=model_name, + running=running, + ) + + if not existing_services: + raise RuntimeError( + f"No Hugging Face inference endpoint deployed by step " + f"'{pipeline_step_name}' in pipeline '{pipeline_name}' with name " + f"'{model_name}' is currently running." + ) + + return existing_services[0] + + +# Use the service for inference +@step +def predictor( + service: HuggingFaceDeploymentService, + data: str +) -> Annotated[str, "predictions"]: + """Run a inference request against a prediction service""" + + prediction = service.predict(data) + return prediction + + +@pipeline +def huggingface_deployment_inference_pipeline( + pipeline_name: str, pipeline_step_name: str = "huggingface_model_deployer_step", +): + inference_data = ... + model_deployment_service = prediction_service_loader( + pipeline_name=pipeline_name, + pipeline_step_name=pipeline_step_name, + ) + predictions = predictor(model_deployment_service, inference_data) +``` + +For more information and a full list of configurable attributes of the Hugging Face Model Deployer, check out +the [SDK Docs](https://sdkdocs.zenml.io/latest/integration_code_docs/integrations-huggingface/#zenml.integrations.huggingface.model_deployers). + + +
ZenML Scarf
diff --git a/docs/book/stacks-and-components/component-guide/model-deployers/model-deployers.md b/docs/book/stacks-and-components/component-guide/model-deployers/model-deployers.md index 11c1e0f4c8..428cfb6a13 100644 --- a/docs/book/stacks-and-components/component-guide/model-deployers/model-deployers.md +++ b/docs/book/stacks-and-components/component-guide/model-deployers/model-deployers.md @@ -44,6 +44,7 @@ integrations: | [MLflow](mlflow.md) | `mlflow` | `mlflow` | Deploys ML Model locally | | [BentoML](bentoml.md) | `bentoml` | `bentoml` | Build and Deploy ML models locally or for production grade (Cloud, K8s) | | [Seldon Core](seldon.md) | `seldon` | `seldon Core` | Built on top of Kubernetes to deploy models for production grade environment | +| [Hugging Face](huggingface.md) | `huggingface` | `huggingface` | Deploys ML model on Hugging Face Inference Endpoints | | [Custom Implementation](custom.md) | _custom_ | | Extend the Artifact Store abstraction and provide your own implementation | {% hint style="info" %} @@ -85,6 +86,7 @@ zenml model-deployer register seldon --flavor=seldon \ ... zenml stack register seldon_stack -m default -a aws -o default -d seldon ``` + 2. Implements the continuous deployment logic necessary to deploy models in a way that updates an existing model server that is already serving a previous version of the same model instead of creating a new model server for every new model version. Every model server that the Model Deployer provisions externally to deploy a model is represented diff --git a/docs/book/toc.md b/docs/book/toc.md index b560ade1ef..136e440e1b 100644 --- a/docs/book/toc.md +++ b/docs/book/toc.md @@ -125,6 +125,7 @@ * [MLflow](stacks-and-components/component-guide/model-deployers/mlflow.md) * [Seldon](stacks-and-components/component-guide/model-deployers/seldon.md) * [BentoML](stacks-and-components/component-guide/model-deployers/bentoml.md) + * [Hugging Face](stacks-and-components/component-guide/model-deployers/huggingface.md) * [Develop a Custom Model Deployer](stacks-and-components/component-guide/model-deployers/custom.md) * [Step Operators](stacks-and-components/component-guide/step-operators/step-operators.md) * [Amazon SageMaker](stacks-and-components/component-guide/step-operators/sagemaker.md) diff --git a/docs/mocked_libs.json b/docs/mocked_libs.json index 7316925c61..f7f6f283f1 100644 --- a/docs/mocked_libs.json +++ b/docs/mocked_libs.json @@ -106,6 +106,8 @@ "great_expectations.types", "hvac", "hvac.exceptions", + "huggingface_hub", + "huggingface_hub.utils", "kfp", "kfp.compiler", "kfp.v2", diff --git a/pyproject.toml b/pyproject.toml index 2b07073a5d..f86386c949 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -448,5 +448,6 @@ module = [ "mlstacks.*", "matplotlib.*", "IPython.*", + "huggingface_hub.*" ] ignore_missing_imports = true diff --git a/src/zenml/integrations/huggingface/__init__.py b/src/zenml/integrations/huggingface/__init__.py index 19e864f362..c1a92f48e4 100644 --- a/src/zenml/integrations/huggingface/__init__.py +++ b/src/zenml/integrations/huggingface/__init__.py @@ -12,9 +12,14 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. """Initialization of the Huggingface integration.""" +from typing import List, Type from zenml.integrations.constants import HUGGINGFACE from zenml.integrations.integration import Integration +from zenml.stack import Flavor + +HUGGINGFACE_MODEL_DEPLOYER_FLAVOR = "huggingface" +HUGGINGFACE_SERVICE_ARTIFACT = "hf_deployment_service" class HuggingfaceIntegration(Integration): @@ -31,6 +36,20 @@ class HuggingfaceIntegration(Integration): def activate(cls) -> None: """Activates the integration.""" from zenml.integrations.huggingface import materializers # noqa + from zenml.integrations.huggingface import services + + @classmethod + def flavors(cls) -> List[Type[Flavor]]: + """Declare the stack component flavors for the Huggingface integration. + + Returns: + List of stack component flavors for this integration. + """ + from zenml.integrations.huggingface.flavors import ( + HuggingFaceModelDeployerFlavor, + ) + + return [HuggingFaceModelDeployerFlavor] HuggingfaceIntegration.check_installation() diff --git a/src/zenml/integrations/huggingface/flavors/__init__.py b/src/zenml/integrations/huggingface/flavors/__init__.py new file mode 100644 index 0000000000..e963d20217 --- /dev/null +++ b/src/zenml/integrations/huggingface/flavors/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Hugging Face integration flavors.""" + +from zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor import ( # noqa + HuggingFaceModelDeployerConfig, + HuggingFaceModelDeployerFlavor, + HuggingFaceBaseConfig, +) + +__all__ = [ + "HuggingFaceModelDeployerConfig", + "HuggingFaceModelDeployerFlavor", + "HuggingFaceBaseConfig", +] diff --git a/src/zenml/integrations/huggingface/flavors/huggingface_model_deployer_flavor.py b/src/zenml/integrations/huggingface/flavors/huggingface_model_deployer_flavor.py new file mode 100644 index 0000000000..d9150fe998 --- /dev/null +++ b/src/zenml/integrations/huggingface/flavors/huggingface_model_deployer_flavor.py @@ -0,0 +1,131 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Hugging Face model deployer flavor.""" + +from typing import TYPE_CHECKING, Any, Dict, Optional, Type + +from pydantic import BaseModel + +from zenml.integrations.huggingface import HUGGINGFACE_MODEL_DEPLOYER_FLAVOR +from zenml.model_deployers.base_model_deployer import ( + BaseModelDeployerConfig, + BaseModelDeployerFlavor, +) +from zenml.utils.secret_utils import SecretField + +if TYPE_CHECKING: + from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import ( + HuggingFaceModelDeployer, + ) + + +class HuggingFaceBaseConfig(BaseModel): + """Hugging Face Inference Endpoint configuration.""" + + endpoint_name: str = "zenml-" + repository: Optional[str] = None + framework: Optional[str] = None + accelerator: Optional[str] = None + instance_size: Optional[str] = None + instance_type: Optional[str] = None + region: Optional[str] = None + vendor: Optional[str] = None + token: Optional[str] = None + account_id: Optional[str] = None + min_replica: int = 0 + max_replica: int = 1 + revision: Optional[str] = None + task: Optional[str] = None + custom_image: Optional[Dict[str, Any]] = None + namespace: Optional[str] = None + endpoint_type: str = "public" + + +class HuggingFaceModelDeployerConfig( + BaseModelDeployerConfig, HuggingFaceBaseConfig +): + """Configuration for the Hugging Face model deployer. + + Attributes: + token: Hugging Face token used for authentication + namespace: Hugging Face namespace used to list endpoints + """ + + token: str = SecretField() + + # The namespace to list endpoints for. Set to `"*"` to list all endpoints + # from all namespaces (i.e. personal namespace and all orgs the user belongs to). + namespace: str + + +class HuggingFaceModelDeployerFlavor(BaseModelDeployerFlavor): + """Hugging Face Endpoint model deployer flavor.""" + + @property + def name(self) -> str: + """Name of the flavor. + + Returns: + The name of the flavor. + """ + return HUGGINGFACE_MODEL_DEPLOYER_FLAVOR + + @property + def docs_url(self) -> Optional[str]: + """A url to point at docs explaining this flavor. + + Returns: + A flavor docs url. + """ + return self.generate_default_docs_url() + + @property + def sdk_docs_url(self) -> Optional[str]: + """A url to point at SDK docs explaining this flavor. + + Returns: + A flavor SDK docs url. + """ + return self.generate_default_sdk_docs_url() + + @property + def logo_url(self) -> str: + """A url to represent the flavor in the dashboard. + + Returns: + The flavor logo. + """ + return "https://public-flavor-logos.s3.eu-central-1.amazonaws.com/model_registry/huggingface.png" + + @property + def config_class(self) -> Type[HuggingFaceModelDeployerConfig]: + """Returns `HuggingFaceModelDeployerConfig` config class. + + Returns: + The config class. + """ + return HuggingFaceModelDeployerConfig + + @property + def implementation_class(self) -> Type["HuggingFaceModelDeployer"]: + """Implementation class for this flavor. + + Returns: + The implementation class. + """ + from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import ( + HuggingFaceModelDeployer, + ) + + return HuggingFaceModelDeployer diff --git a/src/zenml/integrations/huggingface/model_deployers/__init__.py b/src/zenml/integrations/huggingface/model_deployers/__init__.py new file mode 100644 index 0000000000..0e02162ea5 --- /dev/null +++ b/src/zenml/integrations/huggingface/model_deployers/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization of the Hugging Face model deployers.""" + +from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import ( # noqa + HuggingFaceModelDeployer, +) + +__all__ = ["HuggingFaceModelDeployer"] diff --git a/src/zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py b/src/zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py new file mode 100644 index 0000000000..2ab9340586 --- /dev/null +++ b/src/zenml/integrations/huggingface/model_deployers/huggingface_model_deployer.py @@ -0,0 +1,459 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the Hugging Face Model Deployer.""" + +from typing import Any, ClassVar, Dict, List, Optional, Type, cast +from uuid import UUID + +from huggingface_hub import list_inference_endpoints + +from zenml.artifacts.utils import log_artifact_metadata, save_artifact +from zenml.client import Client +from zenml.integrations.huggingface import HUGGINGFACE_SERVICE_ARTIFACT +from zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor import ( + HuggingFaceModelDeployerConfig, + HuggingFaceModelDeployerFlavor, +) +from zenml.integrations.huggingface.services.huggingface_deployment import ( + HuggingFaceDeploymentService, + HuggingFaceServiceConfig, +) +from zenml.logger import get_logger +from zenml.model_deployers import BaseModelDeployer +from zenml.model_deployers.base_model_deployer import ( + DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + BaseModelDeployerFlavor, +) +from zenml.services import BaseService, ServiceConfig, ServiceRegistry + +logger = get_logger(__name__) + +ZENM_ENDPOINT_PREFIX: str = "zenml-" +UUID_SLICE_LENGTH: int = 8 + + +class HuggingFaceModelDeployer(BaseModelDeployer): + """Hugging Face endpoint model deployer.""" + + NAME: ClassVar[str] = "HuggingFace" + FLAVOR: ClassVar[Type[BaseModelDeployerFlavor]] = ( + HuggingFaceModelDeployerFlavor + ) + + @property + def config(self) -> HuggingFaceModelDeployerConfig: + """Config class for the Hugging Face Model deployer settings class. + + Returns: + The configuration. + """ + return cast(HuggingFaceModelDeployerConfig, self._config) + + @property + def deployed_endpoints(self) -> Any: + """Get list of deployed endpoint from Hugging Face. + + Returns: + List of deployed endpoints. + """ + return list_inference_endpoints( + token=self.config.token, + namespace=self.config.namespace, + ) + + def modify_endpoint_name( + self, endpoint_name: str, artifact_version: str + ) -> str: + """Modify endpoint name by adding suffix and prefix. + + It adds a prefix "zenml-" if not present and a suffix + of first 8 characters of uuid. + + Args: + endpoint_name : Name of the endpoint + artifact_version: Name of the artifact version + + Returns: + Modified endpoint name with added prefix and suffix + """ + # Add prefix if it does not start with ZENM_ENDPOINT_PREFIX + if not endpoint_name.startswith(ZENM_ENDPOINT_PREFIX): + endpoint_name = ZENM_ENDPOINT_PREFIX + endpoint_name + + endpoint_name += artifact_version + return endpoint_name + + def _create_new_service( + self, timeout: int, config: HuggingFaceServiceConfig + ) -> HuggingFaceDeploymentService: + """Creates a new Hugging FaceDeploymentService. + + Args: + timeout: the timeout in seconds to wait for the Hugging Face inference endpoint + to be provisioned and successfully started or updated. + config: the configuration of the model to be deployed with Hugging Face model deployer. + + Returns: + The HuggingFaceServiceConfig object that can be used to interact + with the Hugging Face inference endpoint. + """ + # create a new service for the new model + service = HuggingFaceDeploymentService(config) + + # Use first 8 characters of UUID as artifact version + artifact_version = str(service.dict()["uuid"])[:UUID_SLICE_LENGTH] + # Add same 8 characters as suffix to endpoint name + service.config.endpoint_name = self.modify_endpoint_name( + service.config.endpoint_name, artifact_version + ) + + logger.info( + f"Creating an artifact {HUGGINGFACE_SERVICE_ARTIFACT} with service instance attached as metadata." + " If there's an active pipeline and/or model this artifact will be associated with it." + ) + + save_artifact( + service, + HUGGINGFACE_SERVICE_ARTIFACT, + version=artifact_version, + is_deployment_artifact=True, + ) + + # Convert UUID object to be json serializable + service_metadata = service.dict() + service_metadata["uuid"] = str(service_metadata["uuid"]) + log_artifact_metadata( + artifact_name=HUGGINGFACE_SERVICE_ARTIFACT, + artifact_version=artifact_version, + metadata={HUGGINGFACE_SERVICE_ARTIFACT: service_metadata}, + ) + + service.start(timeout=timeout) + return service + + def _clean_up_existing_service( + self, + timeout: int, + force: bool, + existing_service: HuggingFaceDeploymentService, + ) -> None: + """Stop existing services. + + Args: + timeout: the timeout in seconds to wait for the Hugging Face + deployment to be stopped. + force: if True, force the service to stop + existing_service: Existing Hugging Face deployment service + """ + # stop the older service + existing_service.stop(timeout=timeout, force=force) + + def deploy_model( + self, + config: ServiceConfig, + replace: bool = True, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + ) -> BaseService: + """Create a new Hugging Face deployment service or update an existing one. + + This should serve the supplied model and deployment configuration. + + Args: + config: the configuration of the model to be deployed with Hugging Face. + Core + replace: set this flag to True to find and update an equivalent + Hugging Face deployment server with the new model instead of + starting a new deployment server. + timeout: the timeout in seconds to wait for the Hugging Face endpoint + to be provisioned and successfully started or updated. If set + to 0, the method will return immediately after the Hugging Face + server is provisioned, without waiting for it to fully start. + + Returns: + The ZenML Hugging Face deployment service object that can be used to + interact with the remote Hugging Face inference endpoint server. + """ + config = cast(HuggingFaceServiceConfig, config) + service = None + + # if replace is True, remove all existing services + if replace: + existing_services = self.find_model_server( + pipeline_name=config.pipeline_name, + pipeline_step_name=config.pipeline_step_name, + ) + + for existing_service in existing_services: + if service is None: + # keep the most recently created service + service = cast( + HuggingFaceDeploymentService, existing_service + ) + try: + # delete the older services and don't wait for them to + # be deprovisioned + self._clean_up_existing_service( + existing_service=cast( + HuggingFaceDeploymentService, existing_service + ), + timeout=timeout, + force=True, + ) + except RuntimeError: + # ignore errors encountered while stopping old services + pass + + if service: + # update an equivalent service in place + logger.info( + f"Updating an existing Hugging Face deployment service: {service}" + ) + + service_metadata = service.dict() + artifact_version = str(service_metadata["uuid"])[ + :UUID_SLICE_LENGTH + ] + config.endpoint_name = self.modify_endpoint_name( + config.endpoint_name, artifact_version + ) + + service.stop(timeout=timeout, force=True) + service.update(config) + service.start(timeout=timeout) + else: + # create a new HuggingFaceDeploymentService instance + service = self._create_new_service(timeout, config) + logger.info( + f"Creating a new Hugging Face inference endpoint service: {service}" + ) + + return cast(BaseService, service) + + def find_model_server( + self, + running: bool = False, + service_uuid: Optional[UUID] = None, + pipeline_name: Optional[str] = None, + run_name: Optional[str] = None, + pipeline_step_name: Optional[str] = None, + model_name: Optional[str] = None, + model_uri: Optional[str] = None, + model_type: Optional[str] = None, + ) -> List[BaseService]: + """Find one or more Hugging Face model services that match the given criteria. + + Args: + running: if true, only running services will be returned. + service_uuid: the UUID of the Hugging Face service that was + originally used to create the Hugging Face deployment resource. + pipeline_name: name of the pipeline that the deployed model was part + of. + run_name: Name of the pipeline run which the deployed model was + part of. + pipeline_step_name: the name of the pipeline model deployment step + that deployed the model. + model_name: the name of the deployed model. + model_uri: URI of the deployed model. + model_type: the Hugging Face server implementation used to serve + the model + + Raises: + TypeError: If service type does not match HuggingFaceDeploymentService + + Returns: + One or more Hugging Face service objects representing Hugging Face + model servers that match the input search criteria. + """ + # Use a Hugging Face deployment service configuration to compute the labels + config = HuggingFaceServiceConfig( + pipeline_name=pipeline_name or "", + run_name=run_name or "", + pipeline_run_id=run_name or "", + pipeline_step_name=pipeline_step_name or "", + model_name=model_name or "", + model_uri=model_uri or "", + implementation=model_type or "", + ) + + services: List[BaseService] = [] + + # Find all services that match input criteria + for endpoint in self.deployed_endpoints: + if endpoint.name.startswith("zenml-"): + artifact_version = endpoint.name[-8:] + # If service_uuid is supplied, fetch service for that uuid + if ( + service_uuid is not None + and str(service_uuid)[:8] != artifact_version + ): + continue + + # Fetch the saved metadata artifact from zenml server to recreate service + client = Client() + try: + service_artifact = client.get_artifact_version( + HUGGINGFACE_SERVICE_ARTIFACT, artifact_version + ) + hf_deployment_service_dict = service_artifact.run_metadata[ + HUGGINGFACE_SERVICE_ARTIFACT + ].value + + existing_service = ( + ServiceRegistry().load_service_from_dict( + hf_deployment_service_dict # type: ignore + ) + ) + + if not isinstance( + existing_service, HuggingFaceDeploymentService + ): + raise TypeError( + f"Expected service type HuggingFaceDeploymentService but got " + f"{type(existing_service)} instead" + ) + + existing_service.update_status() + if self._matches_search_criteria(existing_service, config): + if not running or existing_service.is_running: + services.append( + cast(BaseService, existing_service) + ) + + # if endpoint is provisioned externally + # we do not have saved artifact for it. + except KeyError: + logger.error( + f"No key found for endpoint {endpoint.name} provisioned externally" + ) + + return services + + def _matches_search_criteria( + self, + existing_service: HuggingFaceDeploymentService, + config: HuggingFaceServiceConfig, + ) -> bool: + """Returns true if a service matches the input criteria. + + If any of the values in the input criteria are None, they are ignored. + This allows listing services just by common pipeline names or step + names, etc. + + Args: + existing_service: The materialized Service instance derived from + the config of the older (existing) service + config: The HuggingFaceServiceConfig object passed to the + deploy_model function holding parameters of the new service + to be created. + + Returns: + True if the service matches the input criteria. + """ + existing_service_config = existing_service.config + + # check if the existing service matches the input criteria + if ( + ( + not config.pipeline_name + or existing_service_config.pipeline_name + == config.pipeline_name + ) + and ( + not config.pipeline_step_name + or existing_service_config.pipeline_step_name + == config.pipeline_step_name + ) + and ( + not config.run_name + or existing_service_config.run_name == config.run_name + ) + ): + return True + + return False + + def stop_model_server( + self, + uuid: UUID, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + force: bool = False, + ) -> None: + """Method to stop a model server. + + Args: + uuid: UUID of the model server to stop. + timeout: Timeout in seconds to wait for the service to stop. + force: If True, force the service to stop. + """ + # get list of all services + existing_services = self.find_model_server(service_uuid=uuid) + + # if the service exists, stop it + if existing_services: + existing_services[0].stop(timeout=timeout, force=force) + + def start_model_server( + self, uuid: UUID, timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT + ) -> None: + """Method to start a model server. + + Args: + uuid: UUID of the model server to start. + timeout: Timeout in seconds to wait for the service to start. + """ + # get list of all services + existing_services = self.find_model_server(service_uuid=uuid) + + # if the service exists, start it + if existing_services: + existing_services[0].start(timeout=timeout) + + def delete_model_server( + self, + uuid: UUID, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, + force: bool = False, + ) -> None: + """Method to delete all configuration of a model server. + + Args: + uuid: UUID of the model server to delete. + timeout: Timeout in seconds to wait for the service to stop. + force: If True, force the service to stop. + """ + # get list of all services + existing_services = self.find_model_server(service_uuid=uuid) + + # if the service exists, clean it up + if existing_services: + service = cast(HuggingFaceDeploymentService, existing_services[0]) + self._clean_up_existing_service( + existing_service=service, timeout=timeout, force=force + ) + + @staticmethod + def get_model_server_info( # type: ignore[override] + service_instance: "HuggingFaceDeploymentService", + ) -> Dict[str, Optional[str]]: + """Return implementation specific information that might be relevant to the user. + + Args: + service_instance: Instance of a HuggingFaceDeploymentService + + Returns: + Model server information. + """ + return { + "PREDICTION_URL": service_instance.prediction_url, + } diff --git a/src/zenml/integrations/huggingface/services/__init__.py b/src/zenml/integrations/huggingface/services/__init__.py new file mode 100644 index 0000000000..7de849c16a --- /dev/null +++ b/src/zenml/integrations/huggingface/services/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization of the Hugging Face Service.""" + +from zenml.integrations.huggingface.services.huggingface_deployment import ( # noqa + HuggingFaceDeploymentService, + HuggingFaceServiceConfig, +) diff --git a/src/zenml/integrations/huggingface/services/huggingface_deployment.py b/src/zenml/integrations/huggingface/services/huggingface_deployment.py new file mode 100644 index 0000000000..26af08f754 --- /dev/null +++ b/src/zenml/integrations/huggingface/services/huggingface_deployment.py @@ -0,0 +1,269 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the Hugging Face Deployment service.""" + +from typing import Any, Generator, Optional, Tuple + +from huggingface_hub import ( + InferenceClient, + InferenceEndpoint, + InferenceEndpointError, + InferenceEndpointStatus, + create_inference_endpoint, + get_inference_endpoint, +) +from huggingface_hub.utils import HfHubHTTPError +from pydantic import Field + +from zenml.integrations.huggingface.flavors.huggingface_model_deployer_flavor import ( + HuggingFaceBaseConfig, +) +from zenml.logger import get_logger +from zenml.services import ServiceState, ServiceStatus, ServiceType +from zenml.services.service import BaseDeploymentService, ServiceConfig + +logger = get_logger(__name__) + +POLLING_TIMEOUT = 1200 + + +class HuggingFaceServiceConfig(HuggingFaceBaseConfig, ServiceConfig): + """Hugging Face service configurations. + + Attributes: + model_name: the name of the model. + """ + + model_name: str = "default" + + +class HuggingFaceServiceStatus(ServiceStatus): + """Hugging Face service status.""" + + +class HuggingFaceDeploymentService(BaseDeploymentService): + """Hugging Face model deployment service. + + Attributes: + SERVICE_TYPE: a service type descriptor with information describing + the Hugging Face deployment service class + config: service configuration + """ + + SERVICE_TYPE = ServiceType( + name="huggingface-deployment", + type="model-serving", + flavor="huggingface", + description="Hugging Face inference endpoint prediction service", + ) + config: HuggingFaceServiceConfig + status: HuggingFaceServiceStatus = Field( + default_factory=lambda: HuggingFaceServiceStatus() + ) + + def __init__(self, config: HuggingFaceServiceConfig, **attrs: Any): + """Initialize the Hugging Face deployment service. + + Args: + config: service configuration + attrs: additional attributes to set on the service + """ + super().__init__(config=config, **attrs) + + @property + def hf_endpoint(self) -> InferenceEndpoint: + """Get the deployed Hugging Face inference endpoint. + + Returns: + Huggingface inference endpoint. + """ + return get_inference_endpoint( + name=self.config.endpoint_name, + token=self.config.token, + namespace=self.config.namespace, + ) + + @property + def prediction_url(self) -> Any: + """The prediction URI exposed by the prediction service. + + Returns: + The prediction URI exposed by the prediction service, or None if + the service is not yet ready. + """ + if not self.is_running: + return None + return self.hf_endpoint.url + + @property + def inference_client(self) -> InferenceClient: + """Get the Hugging Face InferenceClient from Inference Endpoint. + + Returns: + Hugging Face inference client. + """ + return self.hf_endpoint.client + + def provision(self) -> None: + """Provision or update remote Hugging Face deployment instance. + + Raises: + Exception: If any unexpected error while creating inference endpoint. + """ + try: + # Attempt to create and wait for the inference endpoint + _ = create_inference_endpoint( + name=self.config.endpoint_name, + repository=self.config.repository, + framework=self.config.framework, + accelerator=self.config.accelerator, + instance_size=self.config.instance_size, + instance_type=self.config.instance_type, + region=self.config.region, + vendor=self.config.vendor, + account_id=self.config.account_id, + min_replica=self.config.min_replica, + max_replica=self.config.max_replica, + revision=self.config.revision, + task=self.config.task, + custom_image=self.config.custom_image, + type=self.config.endpoint_type, + namespace=self.config.namespace, + token=self.config.token, + ).wait(timeout=POLLING_TIMEOUT) + + # Check if the endpoint URL is available after provisioning + if self.hf_endpoint.url is not None: + logger.info( + "Hugging Face inference endpoint successfully deployed." + ) + else: + logger.error( + "Failed to start Hugging Face inference endpoint service: No URL available." + ) + + except Exception as e: + self.status.update_state( + new_state=ServiceState.ERROR, error=str(e) + ) + # Catch-all for any other unexpected errors + raise Exception( + f"An unexpected error occurred while provisioning the Hugging Face inference endpoint: {e}" + ) + + def check_status(self) -> Tuple[ServiceState, str]: + """Check the the current operational state of the Hugging Face deployment. + + Returns: + The operational state of the Hugging Face deployment and a message + providing additional information about that state (e.g. a + description of the error, if one is encountered). + """ + # TODO: Support all different InferenceEndpointStatus + try: + _ = self.hf_endpoint.status + except (InferenceEndpointError, HfHubHTTPError): + return (ServiceState.INACTIVE, "") + + if self.hf_endpoint.status == InferenceEndpointStatus.RUNNING: + return ( + ServiceState.ACTIVE, + "Hugging Face Inference Endpoint deployment is available", + ) + + elif self.hf_endpoint.status == InferenceEndpointStatus.SCALED_TO_ZERO: + return ( + ServiceState.ACTIVE, + "Hugging Face Inference Endpoint deployment is scaled to zero", + ) + + elif self.hf_endpoint.status == InferenceEndpointStatus.FAILED: + return ( + ServiceState.ERROR, + "Hugging Face Inference Endpoint deployment failed: ", + ) + + elif self.hf_endpoint.status == InferenceEndpointStatus.PENDING: + return ( + ServiceState.PENDING_STARTUP, + "Hugging Face Inference Endpoint deployment is being created: ", + ) + return ( + ServiceState.PENDING_STARTUP, + "Hugging Face Inference Endpoint deployment is being created: ", + ) + + def deprovision(self, force: bool = False) -> None: + """Deprovision the remote Hugging Face deployment instance. + + Args: + force: if True, the remote deployment instance will be + forcefully deprovisioned. + """ + try: + self.hf_endpoint.delete() + except HfHubHTTPError: + logger.error( + "Hugging Face Inference Endpoint is deleted or cannot be found." + ) + pass + + def predict(self, data: "Any", max_new_tokens: int) -> "Any": + """Make a prediction using the service. + + Args: + data: input data + max_new_tokens: Number of new tokens to generate + + Returns: + The prediction result. + + Raises: + Exception: if the service is not running + NotImplementedError: if task is not supported. + """ + if not self.is_running: + raise Exception( + "Hugging Face endpoint inference service is not running. " + "Please start the service before making predictions." + ) + if self.hf_endpoint.prediction_url is not None: + if self.hf_endpoint.task == "text-generation": + result = self.inference_client.task_generation( + data, max_new_tokens=max_new_tokens + ) + else: + # TODO: Add support for all different supported tasks + raise NotImplementedError( + "Tasks other than text-generation is not implemented." + ) + return result + + def get_logs( + self, follow: bool = False, tail: Optional[int] = None + ) -> Generator[str, bool, None]: + """Retrieve the service logs. + + Args: + follow: if True, the logs will be streamed as they are written + tail: only retrieve the last NUM lines of log output. + + Returns: + A generator that can be accessed to get the service logs. + """ + logger.info( + "Hugging Face Endpoints provides access to the logs of " + "your Endpoints through the UI in the “Logs” tab of your Endpoint" + ) + return # type: ignore diff --git a/src/zenml/integrations/huggingface/steps/__init__.py b/src/zenml/integrations/huggingface/steps/__init__.py new file mode 100644 index 0000000000..d6f0379fc1 --- /dev/null +++ b/src/zenml/integrations/huggingface/steps/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Initialization for Hugging Face model deployer step.""" + +from zenml.integrations.huggingface.steps.huggingface_deployer import ( + huggingface_model_deployer_step, +) diff --git a/src/zenml/integrations/huggingface/steps/huggingface_deployer.py b/src/zenml/integrations/huggingface/steps/huggingface_deployer.py new file mode 100644 index 0000000000..fd123e8834 --- /dev/null +++ b/src/zenml/integrations/huggingface/steps/huggingface_deployer.py @@ -0,0 +1,111 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the Hugging Face Deployer step.""" + +from typing import cast + +from zenml import get_step_context, step +from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import ( + HuggingFaceModelDeployer, +) +from zenml.integrations.huggingface.services.huggingface_deployment import ( + HuggingFaceDeploymentService, + HuggingFaceServiceConfig, +) +from zenml.logger import get_logger +from zenml.model_deployers.base_model_deployer import ( + DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, +) + +logger = get_logger(__name__) + + +@step(enable_cache=False) +def huggingface_model_deployer_step( + service_config: HuggingFaceServiceConfig, + deploy_decision: bool = True, + timeout: int = DEFAULT_DEPLOYMENT_START_STOP_TIMEOUT, +) -> HuggingFaceDeploymentService: + """Hugging Face model deployer pipeline step. + + This step can be used in a pipeline to implement continuous + deployment with Hugging Face Inference Endpoint. + + Args: + service_config: Hugging Face deployment service configuration. + deploy_decision: whether to deploy the model or not + timeout: the timeout in seconds to wait for the deployment to start + + Returns: + Huggingface deployment service + """ + model_deployer = cast( + HuggingFaceModelDeployer, + HuggingFaceModelDeployer.get_active_model_deployer(), + ) + + # get pipeline name, step name and run id + context = get_step_context() + pipeline_name = context.pipeline.name + run_name = context.pipeline_run.name + step_name = context.step_run.name + + # update the step configuration with the real pipeline runtime information + service_config = service_config.copy() + service_config.pipeline_name = pipeline_name + service_config.run_name = run_name + service_config.pipeline_step_name = step_name + + # fetch existing services with same pipeline name, step name and + # model name + existing_services = model_deployer.find_model_server( + pipeline_name=pipeline_name, + pipeline_step_name=step_name, + model_name=service_config.model_name, + ) + + # even when the deploy decision is negative, if an existing model server + # is not running for this pipeline/step, we still have to serve the + # current model, to ensure that a model server is available at all times + if not deploy_decision and existing_services: + logger.info( + f"Skipping model deployment because the model quality does not " + f"meet the criteria. Reusing last model server deployed by step " + f"'{step_name}' and pipeline '{pipeline_name}' for model " + f"'{service_config.model_name}'..." + ) + service = cast(HuggingFaceDeploymentService, existing_services[0]) + # even when the deploy decision is negative, we still need to start + # the previous model server if it is no longer running, to ensure that + # a model server is available at all times + if not service.is_running: + service.start(timeout=timeout) + return service + + # invoke the Hugging Face model deployer to create a new service + # or update an existing one that was previously deployed for the same + # model + service = cast( + HuggingFaceDeploymentService, + model_deployer.deploy_model( + service_config, replace=True, timeout=timeout + ), + ) + + logger.info( + f"Hugging Face deployment service started and reachable at:\n" + f" {service.prediction_url}\n" + ) + + return service diff --git a/tests/harness/cfg/tests.yaml b/tests/harness/cfg/tests.yaml index 378d4db1c8..5f28eb8274 100644 --- a/tests/harness/cfg/tests.yaml +++ b/tests/harness/cfg/tests.yaml @@ -130,6 +130,14 @@ tests: - integrations: [huggingface, tensorflow] - capabilities: synchronized: true + - module: tests.integration.examples.test_huggingface_deployment + requirements: + - integrations: [huggingface] + stacks: + - type: model_deployer + flavor: huggingface + - capabilities: + synchronized: true - module: tests.integration.examples.test_lightgbm requirements: - integrations: [lightgbm] diff --git a/tests/integration/examples/huggingface/pipelines/__init__.py b/tests/integration/examples/huggingface/pipelines/__init__.py index ff2a03b96e..55a9876482 100644 --- a/tests/integration/examples/huggingface/pipelines/__init__.py +++ b/tests/integration/examples/huggingface/pipelines/__init__.py @@ -17,8 +17,12 @@ from .token_classifier_pipeline.token_classifier_pipeline import ( token_classifier_train_eval_pipeline, ) +from .deployment_pipelines.deployment_pipeline import huggingface_deployment_pipeline +from .deployment_pipelines.inference_pipeline import inference_pipeline __all__ = [ "seq_classifier_train_eval_pipeline", "token_classifier_train_eval_pipeline", + "huggingface_deployment_pipeline", + "inference_pipeline", ] diff --git a/tests/integration/examples/huggingface/pipelines/deployment_pipelines/__init__.py b/tests/integration/examples/huggingface/pipelines/deployment_pipelines/__init__.py new file mode 100644 index 0000000000..cd90a82cfc --- /dev/null +++ b/tests/integration/examples/huggingface/pipelines/deployment_pipelines/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. diff --git a/tests/integration/examples/huggingface/pipelines/deployment_pipelines/deployment_pipeline.py b/tests/integration/examples/huggingface/pipelines/deployment_pipelines/deployment_pipeline.py new file mode 100644 index 0000000000..b8eb8467f5 --- /dev/null +++ b/tests/integration/examples/huggingface/pipelines/deployment_pipelines/deployment_pipeline.py @@ -0,0 +1,39 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + +from zenml import pipeline +from zenml.config import DockerSettings +from zenml.integrations.constants import HUGGINGFACE +from zenml.integrations.huggingface.services import HuggingFaceServiceConfig +from zenml.integrations.huggingface.steps import ( + huggingface_model_deployer_step, +) + +docker_settings = DockerSettings( + required_integrations=[HUGGINGFACE], +) + + +@pipeline(enable_cache=True, settings={"docker": docker_settings}) +def huggingface_deployment_pipeline( + model_name: str = "hf", + timeout: int = 1200, +): + service_config = HuggingFaceServiceConfig(model_name=model_name) + + # Deployment step + huggingface_model_deployer_step( + service_config=service_config, + timeout=timeout, + ) diff --git a/tests/integration/examples/huggingface/pipelines/deployment_pipelines/inference_pipeline.py b/tests/integration/examples/huggingface/pipelines/deployment_pipelines/inference_pipeline.py new file mode 100644 index 0000000000..f6d7b87366 --- /dev/null +++ b/tests/integration/examples/huggingface/pipelines/deployment_pipelines/inference_pipeline.py @@ -0,0 +1,44 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + + +from steps.prediction_service_loader.prediction_service_loader import ( + prediction_service_loader, +) +from steps.predictor.predictor import predictor + +from zenml import pipeline +from zenml.config import DockerSettings +from zenml.integrations.constants import HUGGINGFACE + +docker_settings = DockerSettings( + required_integrations=[HUGGINGFACE], +) + + +@pipeline(enable_cache=True, settings={"docker": docker_settings}) +def inference_pipeline( + deployment_pipeline_name: str = "huggingface_deployment_pipeline", + model_name: str = "hf", + deployer_step_name: str = "huggingface_model_deployer_step", +): + inference_data = "Test text" + model_deployment_service = prediction_service_loader( + pipeline_name=deployment_pipeline_name, + step_name=deployer_step_name, + model_name=model_name, + ) + + # Run the predictor + predictor(model_deployment_service, inference_data) diff --git a/tests/integration/examples/huggingface/run.py b/tests/integration/examples/huggingface/run.py index ad1bff33cc..986d4a97c9 100644 --- a/tests/integration/examples/huggingface/run.py +++ b/tests/integration/examples/huggingface/run.py @@ -13,6 +13,8 @@ # permissions and limitations under the License. import click from pipelines import ( + huggingface_deployment_pipeline, + inference_pipeline, seq_classifier_train_eval_pipeline, token_classifier_train_eval_pipeline, ) @@ -20,18 +22,24 @@ @click.command() @click.option( - "--nlp_task", - type=click.Choice(["token-classification", "sequence-classification"]), + "--task", + type=click.Choice( + ["token-classification", "sequence-classification", "deployment"] + ), default="sequence-classification", - help="Name NLP task i.e. token-classification, sequence-classification", + help="Name of the task i.e. token-classification, sequence-classification", ) -def main(nlp_task: str): - if nlp_task == "token-classification": +def main(task: str): + if task == "token-classification": token_classifier_train_eval_pipeline() - elif nlp_task == "sequence-classification": + elif task == "sequence-classification": seq_classifier_train_eval_pipeline() + elif task == "deployment": + huggingface_deployment_pipeline() + inference_pipeline() + if __name__ == "__main__": main() diff --git a/tests/integration/examples/huggingface/steps/prediction_service_loader/prediction_service_loader.py b/tests/integration/examples/huggingface/steps/prediction_service_loader/prediction_service_loader.py new file mode 100644 index 0000000000..49a763bdca --- /dev/null +++ b/tests/integration/examples/huggingface/steps/prediction_service_loader/prediction_service_loader.py @@ -0,0 +1,61 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + +from typing import cast + +from zenml import step +from zenml.integrations.huggingface.model_deployers import ( + HuggingFaceModelDeployer, +) +from zenml.integrations.huggingface.services import ( + HuggingFaceDeploymentService, +) + + +@step(enable_cache=False) +def prediction_service_loader( + pipeline_name: str, + model_name: str, + pipeline_step_name: str, + running: bool = True, +) -> HuggingFaceDeploymentService: + """Get the prediction service started by the deployment pipeline. + + Args: + pipeline_name: name of the pipeline that deployed the MLflow prediction + server + step_name: the name of the step that deployed the MLflow prediction + server + model_name: the name of the model that is deployed + running: when this flag is set, the step only returns a running service + """ + # get the Huggingface model deployer stack component + model_deployer = HuggingFaceModelDeployer.get_active_model_deployer() + + # fetch existing services with same pipeline name, step name and model name + services = model_deployer.find_model_server( + pipeline_name=pipeline_name, + pipeline_step_name=pipeline_step_name, + model_name=model_name, + running=running, + ) + + if not services: + raise RuntimeError( + f"No Huggingface inference endpoint deployed by step " + f"'{pipeline_step_name}' in pipeline '{pipeline_name}' with name " + f"'{model_name}' is currently running." + ) + + return cast(HuggingFaceDeploymentService, services[0]) diff --git a/tests/integration/examples/huggingface/steps/predictor/predictor.py b/tests/integration/examples/huggingface/steps/predictor/predictor.py new file mode 100644 index 0000000000..5f67429bbf --- /dev/null +++ b/tests/integration/examples/huggingface/steps/predictor/predictor.py @@ -0,0 +1,30 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + + +from typing_extensions import Annotated + +from zenml import step +from zenml.integrations.huggingface.services import ( + HuggingFaceDeploymentService, +) + + +@step +def predictor( + service: HuggingFaceDeploymentService, data: str +) -> Annotated[str, "predictions"]: + """Run a inference request against a prediction service""" + prediction = service.predict(data) + return prediction diff --git a/tests/integration/examples/test_huggingface.py b/tests/integration/examples/test_huggingface.py index 2a74baa852..10e2ab7808 100644 --- a/tests/integration/examples/test_huggingface.py +++ b/tests/integration/examples/test_huggingface.py @@ -35,7 +35,7 @@ def test_sequence_classification(request: pytest.FixtureRequest) -> None: with run_example( request=request, name="huggingface", - example_args=["--nlp_task", "sequence-classification"], + example_args=["--task", "sequence-classification"], pipelines={"seq_classifier_train_eval_pipeline": (1, 5)}, ): pass diff --git a/tests/integration/examples/test_huggingface_deployment.py b/tests/integration/examples/test_huggingface_deployment.py new file mode 100644 index 0000000000..855cdba7c7 --- /dev/null +++ b/tests/integration/examples/test_huggingface_deployment.py @@ -0,0 +1,40 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + +import pytest + +from tests.integration.examples.utils import run_example +from zenml.client import Client + + +def test_huggingface_deployment(request: pytest.FixtureRequest) -> None: + """Runs the huggingface deployment and inference example.""" + + with run_example( + request=request, + name="huggingface", + example_args=["--task", "deployment"], + pipelines={"huggingface_deployment_pipeline": (1, 2)}, + ): + from zenml.integrations.huggingface.model_deployers.huggingface_model_deployer import ( + HuggingFaceModelDeployer, + ) + + client = Client() + pipeline = client.get_pipeline("huggingface_deployment_pipeline") + assert pipeline + + # get the active model deployer used by the example + model_deployer = client.active_stack.model_deployer + assert isinstance(model_deployer, HuggingFaceModelDeployer)