zenml-io
diff --git a/‎docs/book/component-guide/orchestrators/custom.md
Lines changed: 21 additions & 14 deletions b/‎docs/book/component-guide/orchestrators/custom.md
Lines changed: 21 additions & 14 deletions
diff --git a/‎src/zenml/constants.py
Lines changed: 0 additions & 1 deletion b/‎src/zenml/constants.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/zenml/exceptions.py
Lines changed: 16 additions & 0 deletions b/‎src/zenml/exceptions.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/zenml/integrations/airflow/orchestrators/airflow_orchestrator.py
Lines changed: 15 additions & 6 deletions b/‎src/zenml/integrations/airflow/orchestrators/airflow_orchestrator.py
Lines changed: 15 additions & 6 deletions
diff --git a/‎src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
Lines changed: 54 additions & 58 deletions b/‎src/zenml/integrations/aws/orchestrators/sagemaker_orchestrator.py
Lines changed: 54 additions & 58 deletions
@@ -28,17 +28,14 @@ class BaseOrchestratorConfig(StackComponentConfig):
 class BaseOrchestrator(StackComponent, ABC):
     """Base class for all ZenML orchestrators"""
 
-    @abstractmethod
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
-        deployment: PipelineDeploymentResponseModel,
-        stack: Stack,
+        deployment: "PipelineDeploymentResponse",
+        stack: "Stack",
         environment: Dict[str, str],
-        placeholder_run: Optional[PipelineRunResponse] = None,
-    ) -> Any:
-        """Prepares and runs the pipeline outright or returns an intermediate
-        pipeline representation that gets deployed.
-        """
+        placeholder_run: Optional["PipelineRunResponse"] = None,
+    ) -> Optional[SubmissionResult]:
+        """Submits a pipeline to the orchestrator."""
 
     @abstractmethod
     def get_orchestrator_run_id(self) -> str:
@@ -84,7 +81,7 @@ This is a slimmed-down version of the base implementation which aims to highligh
 
 If you want to create your own custom flavor for an orchestrator, you can follow the following steps:
 
-1. Create a class that inherits from the `BaseOrchestrator` class and implement the abstract `prepare_or_run_pipeline(...)` and `get_orchestrator_run_id()` methods.
+1. Create a class that inherits from the `BaseOrchestrator` class and implement the abstract `submit_pipeline(...)` and `get_orchestrator_run_id()` methods.
 2. If you need to provide any configuration, create a class that inherits from the `BaseOrchestratorConfig` class and add your configuration parameters.
 3. Bring both the implementation and the configuration together by inheriting from the `BaseOrchestratorFlavor` class. Make sure that you give a `name` to the flavor through its abstract property.
 
@@ -125,12 +122,15 @@ The design behind this interaction lets us separate the configuration of the fla
 ## Implementation guide
 
 1. **Create your orchestrator class:** This class should either inherit from `BaseOrchestrator`, or more commonly from `ContainerizedOrchestrator`. If your orchestrator uses container images to run code, you should inherit from `ContainerizedOrchestrator` which handles building all Docker images for the pipeline to be executed. If your orchestator does not use container images, you'll be responsible that the execution environment contains all the necessary requirements and code files to run the pipeline.
-2.  **Implement the `prepare_or_run_pipeline(...)` method:** This method is responsible for running or scheduling the pipeline. In most cases, this means converting the pipeline into a format that your orchestration tool understands and running it. To do so, you should:
+2.  **Implement the `submit_pipeline(...)` method:** This method is responsible for submitting the pipeline run or schedule. In most cases, this means converting the pipeline into a format that your orchestration backend understands and submitting it. To do so, you should:
 
     * Loop over all steps of the pipeline and configure your orchestration tool to run the correct command and arguments in the correct Docker image
     * Make sure the passed environment variables are set when the container is run
     * Make sure the containers are running in the correct order
 
+    * If you want to store any metadata for the run or schedule, return it as part of the `SubmissionResult`.
+    * If your orchestrator is configured to run synchronous, make sure to return a `wait_for_completion` closure in the `SubmissionResult`.
+
     Check out the [code sample](custom.md#code-sample) below for more details on how to fetch the Docker image, command, arguments and step order.
 3. **Implement the `get_orchestrator_run_id()` method:** This must return a ID that is different for each pipeline run, but identical if called from within Docker containers running different steps of the same pipeline run. If your orchestrator is based on an external tool like Kubeflow or Airflow, it is usually best to use an unique ID provided by this tool.
 
@@ -152,7 +152,7 @@ from typing import Dict
 
 from zenml.entrypoints import StepEntrypointConfiguration
 from zenml.models import PipelineDeploymentResponseModel, PipelineRunResponse
-from zenml.orchestrators import ContainerizedOrchestrator
+from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
 from zenml.stack import Stack
 
 
@@ -165,13 +165,13 @@ class MyOrchestrator(ContainerizedOrchestrator):
         # can usually use the run ID of that tool here.
         ...
 
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
         deployment: "PipelineDeploymentResponseModel",
         stack: "Stack",
         environment: Dict[str, str],
         placeholder_run: Optional["PipelineRunResponse"] = None,
-    ) -> None:
+    ) -> Optional[SubmissionResult]:
         # If your orchestrator supports scheduling, you should handle the schedule
         # configured by the user. Otherwise you might raise an exception or log a warning
         # that the orchestrator doesn't support scheduling
@@ -209,6 +209,13 @@ class MyOrchestrator(ContainerizedOrchestrator):
             # specific resources were specified for this step:
             if self.requires_resources_in_orchestration_environment(step):
                 resources = step.config.resource_settings
+
+        if self.config.synchronous:
+            def _wait_for_completion() -> None:
+                # Query your orchestrator backend to wait until the run has finished.
+                # If possible, you can also stream the logs of the pipeline run here.
+            
+            return SubmissionResult(wait_for_completion=_wait_for_completion)
 ```
 
 {% hint style="info" %}
 
@@ -173,7 +173,6 @@ def handle_int_env_var(var: str, default: int = 0) -> int:
 ENV_ZENML_DISABLE_STEP_NAMES_IN_LOGS = "ZENML_DISABLE_STEP_NAMES_IN_LOGS"
 ENV_ZENML_IGNORE_FAILURE_HOOK = "ZENML_IGNORE_FAILURE_HOOK"
 ENV_ZENML_CUSTOM_SOURCE_ROOT = "ZENML_CUSTOM_SOURCE_ROOT"
-ENV_ZENML_WHEEL_PACKAGE_NAME = "ZENML_WHEEL_PACKAGE_NAME"
 ENV_ZENML_PIPELINE_RUN_API_TOKEN_EXPIRATION = (
     "ZENML_PIPELINE_API_TOKEN_EXPIRATION"
 )
 
@@ -220,3 +220,19 @@ class CustomFlavorImportError(ImportError):
 
 class MaxConcurrentTasksError(ZenMLBaseException):
     """Raised when the maximum number of concurrent tasks is reached."""
+
+
+class RunMonitoringError(ZenMLBaseException):
+    """Raised when an error occurs while monitoring a pipeline run."""
+
+    def __init__(
+        self,
+        original_exception: BaseException,
+    ) -> None:
+        """Initializes the error.
+
+        Args:
+            original_exception: The original exception that occurred while
+                monitoring the pipeline run.
+        """
+        self.original_exception = original_exception
@@ -38,7 +38,7 @@
 )
 from zenml.io import fileio
 from zenml.logger import get_logger
-from zenml.orchestrators import ContainerizedOrchestrator
+from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
 from zenml.orchestrators.utils import get_orchestrator_run_name
 from zenml.stack import StackValidator
 from zenml.utils import io_utils
@@ -191,21 +191,29 @@ def prepare_pipeline_deployment(
         if self.config.local:
             stack.check_local_paths()
 
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
         deployment: "PipelineDeploymentResponse",
         stack: "Stack",
         environment: Dict[str, str],
         placeholder_run: Optional["PipelineRunResponse"] = None,
-    ) -> Any:
-        """Creates and writes an Airflow DAG zip file.
+    ) -> Optional[SubmissionResult]:
+        """Submits a pipeline to the orchestrator.
+
+        This method should only submit the pipeline and not wait for it to
+        complete. If the orchestrator is configured to wait for the pipeline run
+        to complete, a function that waits for the pipeline run to complete can
+        be passed as part of the submission result.
 
         Args:
-            deployment: The pipeline deployment to prepare or run.
+            deployment: The pipeline deployment to submit.
             stack: The stack the pipeline will run on.
             environment: Environment variables to set in the orchestration
-                environment.
+                environment. These don't need to be set if running locally.
             placeholder_run: An optional placeholder run for the deployment.
+
+        Returns:
+            Optional submission result.
         """
         pipeline_settings = cast(
             AirflowOrchestratorSettings, self.get_settings(deployment)
@@ -277,6 +285,7 @@ def prepare_or_run_pipeline(
             dag_generator_values=dag_generator_values,
             output_dir=pipeline_settings.dag_output_dir or self.dags_directory,
         )
+        return None
 
     def _apply_resource_settings(
         self,
 
@@ -19,7 +19,6 @@
     TYPE_CHECKING,
     Any,
     Dict,
-    Iterator,
     List,
     Optional,
     Tuple,
@@ -60,7 +59,6 @@
 )
 from zenml.enums import (
     ExecutionStatus,
-    MetadataResourceTypes,
     StackComponentType,
 )
 from zenml.integrations.aws.flavors.sagemaker_orchestrator_flavor import (
@@ -73,7 +71,7 @@
 )
 from zenml.logger import get_logger
 from zenml.metadata.metadata_types import MetadataType, Uri
-from zenml.orchestrators import ContainerizedOrchestrator
+from zenml.orchestrators import ContainerizedOrchestrator, SubmissionResult
 from zenml.orchestrators.utils import get_orchestrator_run_name
 from zenml.stack import StackValidator
 from zenml.utils.env_utils import split_environment_variables
@@ -273,20 +271,25 @@ def _get_sagemaker_session(self) -> Session:
             boto_session=boto_session, default_bucket=self.config.bucket
         )
 
-    def prepare_or_run_pipeline(
+    def submit_pipeline(
         self,
         deployment: "PipelineDeploymentResponse",
         stack: "Stack",
         environment: Dict[str, str],
         placeholder_run: Optional["PipelineRunResponse"] = None,
-    ) -> Iterator[Dict[str, MetadataType]]:
-        """Prepares or runs a pipeline on Sagemaker.
+    ) -> Optional[SubmissionResult]:
+        """Submits a pipeline to the orchestrator.
+
+        This method should only submit the pipeline and not wait for it to
+        complete. If the orchestrator is configured to wait for the pipeline run
+        to complete, a function that waits for the pipeline run to complete can
+        be passed as part of the submission result.
 
         Args:
-            deployment: The deployment to prepare or run.
-            stack: The stack to run on.
+            deployment: The pipeline deployment to submit.
+            stack: The stack the pipeline will run on.
             environment: Environment variables to set in the orchestration
-                environment.
+                environment. These don't need to be set if running locally.
             placeholder_run: An optional placeholder run for the deployment.
 
         Raises:
@@ -296,8 +299,8 @@ def prepare_or_run_pipeline(
                 AWS SageMaker NetworkConfig class.
             ValueError: If the schedule is not valid.
 
-        Yields:
-            A dictionary of metadata related to the pipeline run.
+        Returns:
+            Optional submission result.
         """
         # sagemaker requires pipelineName to use alphanum and hyphens only
         unsanitized_orchestrator_run_name = get_orchestrator_run_name(
@@ -705,26 +708,14 @@ def prepare_or_run_pipeline(
             )
             logger.info(f"The schedule ARN is: {triggers[0]}")
 
+            schedule_metadata = {}
             try:
-                from zenml.models import RunMetadataResource
-
                 schedule_metadata = self.generate_schedule_metadata(
                     schedule_arn=triggers[0]
                 )
-
-                Client().create_run_metadata(
-                    metadata=schedule_metadata,  # type: ignore[arg-type]
-                    resources=[
-                        RunMetadataResource(
-                            id=deployment.schedule.id,
-                            type=MetadataResourceTypes.SCHEDULE,
-                        )
-                    ],
-                )
             except Exception as e:
                 logger.debug(
-                    "There was an error attaching metadata to the "
-                    f"schedule: {e}"
+                    "There was an error generating schedule metadata: %s", e
                 )
 
             logger.info(
@@ -749,6 +740,7 @@ def prepare_or_run_pipeline(
             logger.info(
                 f"`aws scheduler delete-schedule --name {schedule_name}`"
             )
+            return SubmissionResult(metadata=schedule_metadata)
         else:
             # Execute the pipeline immediately if no schedule is specified
             execution = pipeline.start()
@@ -757,33 +749,40 @@ def prepare_or_run_pipeline(
                 "when using the Sagemaker Orchestrator."
             )
 
-            # Yield metadata based on the generated execution object
-            yield from self.compute_metadata(
+            run_metadata = self.compute_metadata(
                 execution_arn=execution.arn, settings=settings
             )
 
-            # mainly for testing purposes, we wait for the pipeline to finish
+            _wait_for_completion = None
             if settings.synchronous:
-                logger.info(
-                    "Executing synchronously. Waiting for pipeline to "
-                    "finish... \n"
-                    "At this point you can `Ctrl-C` out without cancelling the "
-                    "execution."
-                )
-                try:
-                    execution.wait(
-                        delay=POLLING_DELAY, max_attempts=MAX_POLLING_ATTEMPTS
-                    )
-                    logger.info("Pipeline completed successfully.")
-                except WaiterError:
-                    raise RuntimeError(
-                        "Timed out while waiting for pipeline execution to "
-                        "finish. For long-running pipelines we recommend "
-                        "configuring your orchestrator for asynchronous "
-                        "execution. The following command does this for you: \n"
-                        f"`zenml orchestrator update {self.name} "
-                        f"--synchronous=False`"
+
+                def _wait_for_completion() -> None:
+                    logger.info(
+                        "Executing synchronously. Waiting for pipeline to "
+                        "finish... \n"
+                        "At this point you can `Ctrl-C` out without cancelling the "
+                        "execution."
                     )
+                    try:
+                        execution.wait(
+                            delay=POLLING_DELAY,
+                            max_attempts=MAX_POLLING_ATTEMPTS,
+                        )
+                        logger.info("Pipeline completed successfully.")
+                    except WaiterError:
+                        raise RuntimeError(
+                            "Timed out while waiting for pipeline execution to "
+                            "finish. For long-running pipelines we recommend "
+                            "configuring your orchestrator for asynchronous "
+                            "execution. The following command does this for you: \n"
+                            f"`zenml orchestrator update {self.name} "
+                            f"--synchronous=False`"
+                        )
+
+            return SubmissionResult(
+                wait_for_completion=_wait_for_completion,
+                metadata=run_metadata,
+            )
 
     def get_pipeline_run_metadata(
         self, run_id: UUID
@@ -798,20 +797,15 @@ def get_pipeline_run_metadata(
         """
         execution_arn = os.environ[ENV_ZENML_SAGEMAKER_RUN_ID]
 
-        run_metadata: Dict[str, "MetadataType"] = {}
-
         settings = cast(
             SagemakerOrchestratorSettings,
             self.get_settings(Client().get_pipeline_run(run_id)),
         )
 
-        for metadata in self.compute_metadata(
+        return self.compute_metadata(
             execution_arn=execution_arn,
             settings=settings,
-        ):
-            run_metadata.update(metadata)
-
-        return run_metadata
+        )
 
     def fetch_status(self, run: "PipelineRunResponse") -> ExecutionStatus:
         """Refreshes the status of a specific pipeline run.
@@ -873,14 +867,14 @@ def compute_metadata(
         self,
         execution_arn: str,
         settings: SagemakerOrchestratorSettings,
-    ) -> Iterator[Dict[str, MetadataType]]:
+    ) -> Dict[str, MetadataType]:
         """Generate run metadata based on the generated Sagemaker Execution.
 
         Args:
             execution_arn: The ARN of the pipeline execution.
             settings: The Sagemaker orchestrator settings.
 
-        Yields:
+        Returns:
             A dictionary of metadata related to the pipeline run.
         """
         # Orchestrator Run ID
@@ -901,7 +895,7 @@ def compute_metadata(
         ):
             metadata[METADATA_ORCHESTRATOR_LOGS_URL] = Uri(logs_url)
 
-        yield metadata
+        return metadata
 
     def _compute_orchestrator_url(
         self,
@@ -979,7 +973,9 @@ def _compute_orchestrator_logs_url(
             return None
 
     @staticmethod
-    def generate_schedule_metadata(schedule_arn: str) -> Dict[str, str]:
+    def generate_schedule_metadata(
+        schedule_arn: str,
+    ) -> Dict[str, MetadataType]:
         """Attaches metadata to the ZenML Schedules.
 
         Args:
Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,6 @@ def handle_int_env_var(var: str, default: int = 0) -> int:`
`173`	`173`	`ENV_ZENML_DISABLE_STEP_NAMES_IN_LOGS = "ZENML_DISABLE_STEP_NAMES_IN_LOGS"`
`174`	`174`	`ENV_ZENML_IGNORE_FAILURE_HOOK = "ZENML_IGNORE_FAILURE_HOOK"`
`175`	`175`	`ENV_ZENML_CUSTOM_SOURCE_ROOT = "ZENML_CUSTOM_SOURCE_ROOT"`
`176`		`-ENV_ZENML_WHEEL_PACKAGE_NAME = "ZENML_WHEEL_PACKAGE_NAME"`
`177`	`176`	`ENV_ZENML_PIPELINE_RUN_API_TOKEN_EXPIRATION = (`
`178`	`177`	`"ZENML_PIPELINE_API_TOKEN_EXPIRATION"`
`179`	`178`	`)`