Release 3.55.0 (#1276)

vbrodsky · web-flow · commit 31b79da306b5 · 2023-11-06T14:44:53.000-08:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,12 @@
 # Changelog
+
+# Version 3.55.0 (2023-11-06)
+## Fixed
+* Fix the instantiation of `failed_data_row_ids` in Batch. This fix will address the issue with the `create_batch` method for more than 1,000 data rows.
+* Improve Python type hints for the `data_rows()` method in the Dataset.
+* Fix the `DataRowMetadataOntology` method `bulk_export()` to properly export global key(s).
+* In the `DataRowMetadataOntology` method `update_enum_option`, provide a more descriptive error message when the enum option is not valid.
+
 # Version 3.54.1 (2023-10-17)
 ## Notebooks
 * Revised the notebooks to update outdated examples when using `client.create_project()` to create a project
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -21,7 +21,7 @@
 copyright = '2021, Labelbox'
 author = 'Labelbox'
 
-release = '3.54.1'
+release = '3.55.0'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/labelbox/__init__.py b/labelbox/__init__.py
@@ -1,5 +1,6 @@
 name = "labelbox"
-__version__ = "3.54.1"
+
+__version__ = "3.55.0"
 
 from labelbox.client import Client
 from labelbox.schema.project import Project
diff --git a/labelbox/schema/batch.py b/labelbox/schema/batch.py
@@ -43,7 +43,7 @@ def __init__(self,
                  client,
                  project_id,
                  *args,
-                 failed_data_row_ids=None,
+                 failed_data_row_ids=[],
                  **kwargs):
         super().__init__(client, *args, **kwargs)
         self.project_id = project_id
@@ -187,6 +187,11 @@ def delete_labels(self, set_labels_as_template=False) -> None:
             experimental=True)
         return res
 
+    # modify this function to return an empty list if there are no failed data rows
+
     @property
     def failed_data_row_ids(self):
+        if self._failed_data_row_ids is None:
+            self._failed_data_row_ids = []
+
         return (x for x in self._failed_data_row_ids)
diff --git a/labelbox/schema/data_row_metadata.py b/labelbox/schema/data_row_metadata.py
@@ -255,7 +255,6 @@ def _parse_ontology(raw_ontology) -> List[DataRowMetadataSchema]:
                 options = []
                 for option in schema["options"]:
                     option["uid"] = option["id"]
-
                     options.append(
                         DataRowMetadataSchema(**{
                             **option,
@@ -366,7 +365,12 @@ def update_enum_option(self, name: str, option: str,
             raise ValueError(
                 f"Updating Enum option is only supported for Enum metadata schema"
             )
+        valid_options: List[str] = [o.name for o in schema.options]
 
+        if option not in valid_options:
+            raise ValueError(
+                f"Enum option '{option}' is not a valid option for Enum '{name}', valid options are: {valid_options}"
+            )
         upsert_schema = _UpsertCustomMetadataSchemaInput(id=schema.uid,
                                                          name=schema.name,
                                                          kind=schema.kind.value)
@@ -431,7 +435,9 @@ def parse_metadata(
             if "fields" in dr:
                 fields = self.parse_metadata_fields(dr["fields"])
             parsed.append(
-                DataRowMetadata(data_row_id=dr["dataRowId"], fields=fields))
+                DataRowMetadata(data_row_id=dr["dataRowId"],
+                                global_key=dr["globalKey"],
+                                fields=fields))
         return parsed
 
     def parse_metadata_fields(
@@ -617,6 +623,7 @@ def _bulk_export(_data_row_ids: List[str]) -> List[DataRowMetadata]:
             query = """query dataRowCustomMetadataPyApi($dataRowIds: [ID!]!) {
                 dataRowCustomMetadata(where: {dataRowIds : $dataRowIds}) {
                     dataRowId
+                    globalKey
                     fields {
                         value
                         schemaId
diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
@@ -68,8 +68,8 @@ class Dataset(DbObject, Updateable, Deletable):
 
     def data_rows(
         self,
-        from_cursor: str = None,
-        where: Comparison = None,
+        from_cursor: Optional[str] = None,
+        where: Optional[Comparison] = None,
     ) -> PaginatedCollection:
         """ 
         Custom method to paginate data_rows via cursor.
diff --git a/tests/data/annotation_types/data/test_text.py b/tests/data/annotation_types/data/test_text.py
@@ -22,7 +22,7 @@ def test_text():
 
 
 def test_url():
-    url = "https://filesamples.com/samples/document/txt/sample3.txt"
+    url = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/sample3.txt"
     text_data = TextData(url=url)
     text = text_data.value
     assert len(text) == 3541
diff --git a/tests/integration/test_batch.py b/tests/integration/test_batch.py
@@ -14,9 +14,17 @@ def get_data_row_ids(ds: Dataset):
 
 
 def test_create_batch(project: Project, big_dataset_data_row_ids: List[str]):
-    batch = project.create_batch("test-batch", big_dataset_data_row_ids, 3)
+    batch = project.create_batch("test-batch",
+                                 big_dataset_data_row_ids,
+                                 3,
+                                 consensus_settings={
+                                     'number_of_labels': 3,
+                                     'coverage_percentage': 0.1
+                                 })
+
     assert batch.name == "test-batch"
     assert batch.size == len(big_dataset_data_row_ids)
+    assert len([dr for dr in batch.failed_data_row_ids]) == 0
 
 
 def test_create_batch_with_invalid_data_rows_ids(project: Project):
@@ -101,6 +109,7 @@ def test_create_batch_async(project: Project,
                                         priority=3)
     assert batch.name == "big-batch"
     assert batch.size == len(big_dataset_data_row_ids)
+    assert len([dr for dr in batch.failed_data_row_ids]) == 0
 
 
 def test_create_batch_with_consensus_settings(project: Project,
diff --git a/tests/integration/test_data_row_metadata.py b/tests/integration/test_data_row_metadata.py
@@ -94,6 +94,15 @@ def test_export_empty_metadata(client, configured_project_with_label,
     assert label.data.metadata == []
 
 
+def test_bulk_export_datarow_metadata(data_row, mdo: DataRowMetadataOntology):
+    metadata = make_metadata(data_row.uid)
+    mdo.bulk_upsert([metadata])
+    exported = mdo.bulk_export([data_row.uid])
+    assert exported[0].global_key == data_row.global_key
+    assert exported[0].data_row_id == data_row.uid
+    assert len([field for field in exported[0].fields]) == 3
+
+
 def test_get_datarow_metadata_ontology(mdo):
     assert len(mdo.fields)
     assert len(mdo.reserved_fields)
@@ -316,6 +325,8 @@ def test_parse_raw_metadata(mdo):
     example = {
         'dataRowId':
             'ckr6kkfx801ui0yrtg9fje8xh',
+        'globalKey':
+            'global-key-1',
         'fields': [
             {
                 'schemaId': 'cko8s9r5v0001h2dk9elqdidh',
@@ -344,6 +355,7 @@ def test_parse_raw_metadata(mdo):
     assert len(parsed) == 1
     for row in parsed:
         assert row.data_row_id == example["dataRowId"]
+        assert row.global_key == example["globalKey"]
         assert len(row.fields) == 4
 
     for row in parsed:
diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
@@ -964,7 +964,8 @@ def test_data_row_bulk_creation_sync_with_same_global_keys(
         assert list(dataset.data_rows())[0].global_key == global_key_1
 
 
-def test_create_conversational_text(dataset, conversational_content):
+@pytest.fixture
+def converstational_data_rows(dataset, conversational_content):
     examples = [
         {
             **conversational_content, 'media_type':
@@ -975,9 +976,20 @@ def test_create_conversational_text(dataset, conversational_content):
             "conversationalData": conversational_content['row_data']['messages']
         }  # Old way to check for backwards compatibility
     ]
-    dataset.create_data_rows_sync(examples)
+    task = dataset.create_data_rows(examples)
+    task.wait_till_done()
+    assert task.status == "COMPLETE"
+
     data_rows = list(dataset.data_rows())
-    assert len(data_rows) == len(examples)
+
+    yield data_rows
+    for dr in data_rows:
+        dr.delete()
+
+
+def test_create_conversational_text(converstational_data_rows,
+                                    conversational_content):
+    data_rows = converstational_data_rows
     for data_row in data_rows:
         assert requests.get(
             data_row.row_data).json() == conversational_content['row_data']