Labelbox · msokoloff1 · Sep 7, 2021 · Aug 26, 2021 · Aug 31, 2021 · Aug 31, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,14 @@
 # Changelog
 
+# Version 3.3.0 (2021-09-02)
+## Added
+* `Dataset.create_data_rows_sync()` for synchronous bulk uploads of data rows
+* `Model.delete()`, `ModelRun.delete()`, and `ModelRun.delete_annotation_groups()` to
+    Clean up models, model runs, and annotation groups.
+
+## Fix
+* Increased timeout for label exports since projects with many segmentation masks weren't finishing quickly enough.
+
 # Version 3.2.1 (2021-08-31)
 ## Fix
 * Resolved issue with `create_data_rows()` was not working on amazon linux

diff --git a/labelbox/__init__.py b/labelbox/__init__.py
@@ -1,5 +1,5 @@
 name = "labelbox"
-__version__ = "3.2.1"
+__version__ = "3.3.0"
 
 from labelbox.schema.project import Project
 from labelbox.client import Client

diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
@@ -69,13 +69,111 @@ def create_data_row(self, **kwargs):
         row_data = kwargs[DataRow.row_data.name]
         if os.path.exists(row_data):
             kwargs[DataRow.row_data.name] = self.client.upload_file(row_data)
-
         kwargs[DataRow.dataset.name] = self
-
         return self.client._create(DataRow, kwargs)
 
+    def create_data_rows_sync(self, items):
+        """ Synchronously bulk upload data rows.
+
+        Use this instead of `Dataset.create_data_rows` for smaller batches of data rows that need to be uploaded quickly.
+        Cannot use this for uploads containing more than 1000 data rows.
+        Each data row is also limited to 5 attachments.
+
+        Args:
+            items (iterable of (dict or str)):
+                See the docstring for `Dataset._create_descriptor_file` for more information.
+        Returns:
+            None. If the function doesn't raise an exception then the import was successful.
+
+        Raises:
+            InvalidQueryError: If the `items` parameter does not conform to
+                the specification in Dataset._create_descriptor_file or if the server did not accept the
+                DataRow creation request (unknown reason).
+            InvalidAttributeError: If there are fields in `items` not valid for
+                a DataRow.
+            ValueError: When the upload parameters are invalid
+        """
+        max_data_rows_supported = 1000
+        max_attachments_per_data_row = 5
+        if len(items) > max_data_rows_supported:
+            raise ValueError(
+                f"Dataset.create_data_rows_sync() supports a max of {max_data_rows_supported} data rows."
+                " For larger imports use the async function Dataset.create_data_rows()"
+            )
+        descriptor_url = self._create_descriptor_file(
+            items, max_attachments_per_data_row=max_attachments_per_data_row)
+        dataset_param = "datasetId"
+        url_param = "jsonUrl"
+        query_str = """mutation AppendRowsToDatasetSyncPyApi($%s: ID!, $%s: String!){
+            appendRowsToDatasetSync(data:{datasetId: $%s, jsonFileUrl: $%s}
+            ){dataset{id}}} """ % (dataset_param, url_param, dataset_param,
+                                   url_param)
+        self.client.execute(query_str, {
+            dataset_param: self.uid,
+            url_param: descriptor_url
+        })
+
     def create_data_rows(self, items):
-        """ Creates multiple DataRow objects based on the given `items`.
+        """ Asynchronously bulk upload data rows
+
+        Use this instead of `Dataset.create_data_rows_sync` uploads for batches that contain more than 100 data rows.
+
+        Args:
+            items (iterable of (dict or str)): See the docstring for `Dataset._create_descriptor_file` for more information
+
+        Returns:
+            Task representing the data import on the server side. The Task
+            can be used for inspecting task progress and waiting until it's done.
+
+        Raises:
+            InvalidQueryError: If the `items` parameter does not conform to
+                the specification above or if the server did not accept the
+                DataRow creation request (unknown reason).
+            ResourceNotFoundError: If unable to retrieve the Task for the
+                import process. This could imply that the import failed.
+            InvalidAttributeError: If there are fields in `items` not valid for
+                a DataRow.
+            ValueError: When the upload parameters are invalid
+        """
+        descriptor_url = self._create_descriptor_file(items)
+        # Create data source
+        dataset_param = "datasetId"
+        url_param = "jsonUrl"
+        query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
+            appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
+            ){ taskId accepted errorMessage } } """ % (dataset_param, url_param,
+                                                       dataset_param, url_param)
+
+        res = self.client.execute(query_str, {
+            dataset_param: self.uid,
+            url_param: descriptor_url
+        })
+        res = res["appendRowsToDataset"]
+        if not res["accepted"]:
+            msg = res['errorMessage']
+            raise InvalidQueryError(
+                f"Server did not accept DataRow creation request. {msg}")
+
+        # Fetch and return the task.
+        task_id = res["taskId"]
+        user = self.client.get_user()
+        task = list(user.created_tasks(where=Entity.Task.uid == task_id))
+        # Cache user in a private variable as the relationship can't be
+        # resolved due to server-side limitations (see Task.created_by)
+        # for more info.
+        if len(task) != 1:
+            raise ResourceNotFoundError(Entity.Task, task_id)
+        task = task[0]
+        task._user = user
+        return task
+
+    def _create_descriptor_file(self, items, max_attachments_per_data_row=None):
+        """
+        This function is shared by both `Dataset.create_data_rows` and `Dataset.create_data_rows_sync`
+        to prepare the input file. The user defined input is validated, processed, and json stringified.
+        Finally the json data is uploaded to gcs and a uri is returned. This uri can be passed to
+
+
 
         Each element in `items` can be either a `str` or a `dict`. If
         it is a `str`, then it is interpreted as a local file path. The file
@@ -102,19 +200,19 @@ def create_data_rows(self, items):
 
         Args:
             items (iterable of (dict or str)): See above for details.
+            max_attachments_per_data_row (Optional[int]): Param used during attachment validation to determine
+                if the user has provided too many attachments.
 
         Returns:
-            Task representing the data import on the server side. The Task
-            can be used for inspecting task progress and waiting until it's done.
+            uri (string): A reference to the uploaded json data.
 
         Raises:
             InvalidQueryError: If the `items` parameter does not conform to
                 the specification above or if the server did not accept the
                 DataRow creation request (unknown reason).
-            ResourceNotFoundError: If unable to retrieve the Task for the
-                import process. This could imply that the import failed.
             InvalidAttributeError: If there are fields in `items` not valid for
                 a DataRow.
+            ValueError: When the upload parameters are invalid
         """
         file_upload_thread_count = 20
         DataRow = Entity.DataRow
@@ -135,6 +233,12 @@ def validate_attachments(item):
             attachments = item.get('attachments')
             if attachments:
                 if isinstance(attachments, list):
+                    if max_attachments_per_data_row and len(
+                            attachments) > max_attachments_per_data_row:
+                        raise ValueError(
+                            f"Max attachments number of supported attachments per data row is {max_attachments_per_data_row}."
+                            f" Found {len(attachments)}. Condense multiple attachments into one with the HTML attachment type if necessary."
+                        )
                     for attachment in attachments:
                         AssetAttachment.validate_attachment_json(attachment)
                 else:
@@ -198,40 +302,9 @@ def convert_item(item):
         with ThreadPoolExecutor(file_upload_thread_count) as executor:
             futures = [executor.submit(convert_item, item) for item in items]
             items = [future.result() for future in as_completed(futures)]
-
         # Prepare and upload the desciptor file
         data = json.dumps(items)
-        descriptor_url = self.client.upload_data(data)
-        # Create data source
-        dataset_param = "datasetId"
-        url_param = "jsonUrl"
-        query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
-            appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
-            ){ taskId accepted errorMessage } } """ % (dataset_param, url_param,
-                                                       dataset_param, url_param)
-
-        res = self.client.execute(query_str, {
-            dataset_param: self.uid,
-            url_param: descriptor_url
-        })
-        res = res["appendRowsToDataset"]
-        if not res["accepted"]:
-            msg = res['errorMessage']
-            raise InvalidQueryError(
-                f"Server did not accept DataRow creation request. {msg}")
-
-        # Fetch and return the task.
-        task_id = res["taskId"]
-        user = self.client.get_user()
-        task = list(user.created_tasks(where=Entity.Task.uid == task_id))
-        # Cache user in a private variable as the relationship can't be
-        # resolved due to server-side limitations (see Task.created_by)
-        # for more info.
-        if len(task) != 1:
-            raise ResourceNotFoundError(Entity.Task, task_id)
-        task = task[0]
-        task._user = user
-        return task
+        return self.client.upload_data(data)
 
     def data_rows_for_external_id(self, external_id, limit=10):
         """ Convenience method for getting a single `DataRow` belonging to this

diff --git a/labelbox/schema/model.py b/labelbox/schema/model.py
@@ -34,3 +34,14 @@ def create_model_run(self, name):
             model_id_param: self.uid
         })
         return ModelRun(self.client, res["createModelRun"])
+
+    def delete(self):
+        """ Deletes specified model.
+
+        Returns:
+            Query execution success.
+        """
+        ids_param = "ids"
+        query_str = """mutation DeleteModelPyApi($%s: ID!) {
+            deleteModels(where: {ids: [$%s]})}""" % (ids_param, ids_param)
+        self.client.execute(query_str, {ids_param: str(self.uid)})
diff --git a/labelbox/schema/model_run.py b/labelbox/schema/model_run.py
@@ -74,6 +74,36 @@ def annotation_groups(self):
             lambda client, res: AnnotationGroup(client, self.model_id, res),
             ['annotationGroups', 'pageInfo', 'endCursor'])
 
+    def delete(self):
+        """ Deletes specified model run.
+
+        Returns:
+            Query execution success.
+        """
+        ids_param = "ids"
+        query_str = """mutation DeleteModelRunPyApi($%s: ID!) {
+            deleteModelRuns(where: {ids: [$%s]})}""" % (ids_param, ids_param)
+        self.client.execute(query_str, {ids_param: str(self.uid)})
+
+    def delete_annotation_groups(self, data_row_ids):
+        """ Deletes annotation groups by data row ids for a model run.
+
+        Args:
+            data_row_ids (list): List of data row ids to delete annotation groups.
+        Returns:
+            Query execution success.
+        """
+        model_run_id_param = "modelRunId"
+        data_row_ids_param = "dataRowIds"
+        query_str = """mutation DeleteModelRunDataRowsPyApi($%s: ID!, $%s: [ID!]!) {
+            deleteModelRunDataRows(where: {modelRunId: $%s, dataRowIds: $%s})}""" % (
+            model_run_id_param, data_row_ids_param, model_run_id_param,
+            data_row_ids_param)
+        self.client.execute(query_str, {
+            model_run_id_param: self.uid,
+            data_row_ids_param: data_row_ids
+        })
+
 
 class AnnotationGroup(DbObject):
     label_id = Field.String("label_id")

diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py
@@ -166,7 +166,7 @@ def export_queued_data_rows(self, timeout_seconds=120):
                 self.uid)
             time.sleep(sleep_time)
 
-    def video_label_generator(self, timeout_seconds=120):
+    def video_label_generator(self, timeout_seconds=600):
         """
         Download video annotations
 
@@ -190,7 +190,7 @@ def video_label_generator(self, timeout_seconds=120):
                 "Or use project.label_generator() for text and imagery data.")
         return LBV1Converter.deserialize_video(json_data, self.client)
 
-    def label_generator(self, timeout_seconds=60):
+    def label_generator(self, timeout_seconds=600):
         """
         Download text and image annotations
 
@@ -214,7 +214,7 @@ def label_generator(self, timeout_seconds=60):
                 "Or use project.video_label_generator() for video data.")
         return LBV1Converter.deserialize(json_data)
 
-    def export_labels(self, download=False, timeout_seconds=60):
+    def export_labels(self, download=False, timeout_seconds=600):
         """ Calls the server-side Label exporting that generates a JSON
         payload, and returns the URL to that payload.
 

diff --git a/labelbox/schema/task.py b/labelbox/schema/task.py
@@ -40,7 +40,7 @@ def refresh(self):
         for field in self.fields():
             setattr(self, field.name, getattr(tasks[0], field.name))
 
-    def wait_till_done(self, timeout_seconds=60):
+    def wait_till_done(self, timeout_seconds=300):
         """ Waits until the task is completed. Periodically queries the server
         to update the task attributes.
 

diff --git a/tests/integration/bulk_import/conftest.py → tests/integration/mal_and_mea/conftest.py b/tests/integration/bulk_import/conftest.py → tests/integration/mal_and_mea/conftest.py
@@ -297,11 +297,25 @@ def predictions(object_predictions, classification_predictions):
 
 
 @pytest.fixture
-def model_run(client, rand_gen, configured_project, annotation_submit_fn,
-              model_run_predictions):
-    configured_project.enable_model_assisted_labeling()
+def model(client, rand_gen, configured_project):
     ontology = configured_project.ontology()
 
+    data = {"name": rand_gen(str), "ontology_id": ontology.uid}
+    return client.create_model(data["name"], data["ontology_id"])
+
+
+@pytest.fixture
+def model_run(rand_gen, model):
+    name = rand_gen(str)
+    return model.create_model_run(name)
+
+
+@pytest.fixture
+def model_run_annotation_groups(client, configured_project,
+                                annotation_submit_fn, model_run_predictions,
+                                model_run):
+    configured_project.enable_model_assisted_labeling()
+
     upload_task = MALPredictionImport.create_from_objects(
         client, configured_project.uid, f'mal-import-{uuid.uuid4()}',
         model_run_predictions)
@@ -310,15 +324,10 @@ def model_run(client, rand_gen, configured_project, annotation_submit_fn,
     for data_row_id in {x['dataRow']['id'] for x in model_run_predictions}:
         annotation_submit_fn(configured_project.uid, data_row_id)
 
-    data = {"name": rand_gen(str), "ontology_id": ontology.uid}
-    model = client.create_model(data["name"], data["ontology_id"])
-    name = rand_gen(str)
-    model_run_s = model.create_model_run(name)
-
     time.sleep(3)
     labels = configured_project.export_labels(download=True)
-    model_run_s.upsert_labels([label['ID'] for label in labels])
+    model_run.upsert_labels([label['ID'] for label in labels])
     time.sleep(3)
 
-    yield model_run_s
+    yield model_run
     # TODO: Delete resources when that is possible ..
diff --git a/...n/bulk_import/test_bulk_import_request.py → ...n/mal_and_mea/test_bulk_import_request.py b/...n/bulk_import/test_bulk_import_request.py → ...n/mal_and_mea/test_bulk_import_request.py