Skip to content

Commit ef3b26c

Browse files
Merge pull request #799 from Labelbox/develop
3.33.0
2 parents a40a23a + 0ccd47b commit ef3b26c

File tree

10 files changed

+188
-33
lines changed

10 files changed

+188
-33
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Changelog
22

3+
# Version 3.33.0 (2022-12-13)
4+
### Added
5+
* Added SDK support for creating batches with up to 100k data rows
6+
* Added optional media_type to `client.create_ontology_from_feature_schemas()` and `client.create_ontology()`
7+
8+
### Changed
9+
* String representation of `DbObject` subclasses are now formatted
10+
311
# Version 3.32.0 (2022-12-02)
412
### Added
513
* Added `HTML` Enum to `MediaType`. `HTML` is introduced as a new asset type in Labelbox.

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
copyright = '2021, Labelbox'
2222
author = 'Labelbox'
2323

24-
release = '3.32.0'
24+
release = '3.33.0'
2525

2626
# -- General configuration ---------------------------------------------------
2727

labelbox/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = "labelbox"
2-
__version__ = "3.32.0"
2+
__version__ = "3.33.0"
33

44
from labelbox.client import Client
55
from labelbox.schema.project import Project

labelbox/client.py

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from labelbox.schema.slice import CatalogSlice
3737
from labelbox.schema.queue_mode import QueueMode
3838

39-
from labelbox.schema.media_type import MediaType
39+
from labelbox.schema.media_type import MediaType, get_media_type_validation_error
4040

4141
logger = logging.getLogger(__name__)
4242

@@ -853,15 +853,18 @@ def rootSchemaPayloadToFeatureSchema(client, payload):
853853
rootSchemaPayloadToFeatureSchema,
854854
['rootSchemaNodes', 'nextCursor'])
855855

856-
def create_ontology_from_feature_schemas(self, name,
857-
feature_schema_ids) -> Ontology:
856+
def create_ontology_from_feature_schemas(self,
857+
name,
858+
feature_schema_ids,
859+
media_type=None) -> Ontology:
858860
"""
859861
Creates an ontology from a list of feature schema ids
860862
861863
Args:
862864
name (str): Name of the ontology
863865
feature_schema_ids (List[str]): List of feature schema ids corresponding to
864866
top level tools and classifications to include in the ontology
867+
media_type (MediaType or None): Media type of a new ontology
865868
Returns:
866869
The created Ontology
867870
"""
@@ -891,9 +894,9 @@ def create_ontology_from_feature_schemas(self, name,
891894
"Neither `tool` or `classification` found in the normalized feature schema"
892895
)
893896
normalized = {'tools': tools, 'classifications': classifications}
894-
return self.create_ontology(name, normalized)
897+
return self.create_ontology(name, normalized, media_type)
895898

896-
def create_ontology(self, name, normalized) -> Ontology:
899+
def create_ontology(self, name, normalized, media_type=None) -> Ontology:
897900
"""
898901
Creates an ontology from normalized data
899902
>>> normalized = {"tools" : [{'tool': 'polygon', 'name': 'cat', 'color': 'black'}], "classifications" : []}
@@ -910,13 +913,27 @@ def create_ontology(self, name, normalized) -> Ontology:
910913
Args:
911914
name (str): Name of the ontology
912915
normalized (dict): A normalized ontology payload. See above for details.
916+
media_type (MediaType or None): Media type of a new ontology
913917
Returns:
914918
The created Ontology
915919
"""
920+
921+
if media_type:
922+
if MediaType.is_supported(media_type):
923+
media_type = media_type.value
924+
else:
925+
raise get_media_type_validation_error(media_type)
926+
916927
query_str = """mutation upsertRootSchemaNodePyApi($data: UpsertOntologyInput!){
917928
upsertOntology(data: $data){ %s }
918929
} """ % query.results_query_part(Entity.Ontology)
919-
params = {'data': {'name': name, 'normalized': json.dumps(normalized)}}
930+
params = {
931+
'data': {
932+
'name': name,
933+
'normalized': json.dumps(normalized),
934+
'mediaType': media_type
935+
}
936+
}
920937
res = self.execute(query_str, params)
921938
return Entity.Ontology(self, res['upsertOntology'])
922939

@@ -1035,9 +1052,9 @@ def _format_failed_rows(rows: Dict[str, str],
10351052
)
10361053

10371054
# Start assign global keys to data rows job
1038-
query_str = """mutation assignGlobalKeysToDataRowsPyApi($globalKeyDataRowLinks: [AssignGlobalKeyToDataRowInput!]!) {
1039-
assignGlobalKeysToDataRows(data: {assignInputs: $globalKeyDataRowLinks}) {
1040-
jobId
1055+
query_str = """mutation assignGlobalKeysToDataRowsPyApi($globalKeyDataRowLinks: [AssignGlobalKeyToDataRowInput!]!) {
1056+
assignGlobalKeysToDataRows(data: {assignInputs: $globalKeyDataRowLinks}) {
1057+
jobId
10411058
}
10421059
}
10431060
"""
@@ -1172,7 +1189,7 @@ def _format_failed_rows(rows: List[str],
11721189

11731190
# Query string for retrieving job status and result, if job is done
11741191
result_query_str = """query getDataRowsForGlobalKeysResultPyApi($jobId: ID!) {
1175-
dataRowsForGlobalKeysResult(jobId: {id: $jobId}) { data {
1192+
dataRowsForGlobalKeysResult(jobId: {id: $jobId}) { data {
11761193
fetchedDataRows { id }
11771194
notFoundGlobalKeys
11781195
accessDeniedGlobalKeys
@@ -1246,8 +1263,8 @@ def clear_global_keys(
12461263
12471264
'Results' contains a list global keys that were successfully cleared.
12481265
1249-
'Errors' contains a list of global_keys correspond to the data rows that could not be
1250-
modified, accessed by the user, or not found.
1266+
'Errors' contains a list of global_keys correspond to the data rows that could not be
1267+
modified, accessed by the user, or not found.
12511268
Examples:
12521269
>>> job_result = client.get_data_row_ids_for_global_keys(["key1","key2"])
12531270
>>> print(job_result['status'])
@@ -1271,7 +1288,7 @@ def _format_failed_rows(rows: List[str],
12711288

12721289
# Query string for retrieving job status and result, if job is done
12731290
result_query_str = """query clearGlobalKeysResultPyApi($jobId: ID!) {
1274-
clearGlobalKeysResult(jobId: {id: $jobId}) { data {
1291+
clearGlobalKeysResult(jobId: {id: $jobId}) { data {
12751292
clearedGlobalKeys
12761293
failedToClearGlobalKeys
12771294
notFoundGlobalKeys

labelbox/orm/db_object.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import datetime, timezone
22
from functools import wraps
33
import logging
4+
import json
45

56
from labelbox import utils
67
from labelbox.exceptions import InvalidQueryError, InvalidAttributeError
@@ -92,7 +93,8 @@ def __str__(self):
9293
attribute_values = {
9394
field.name: getattr(self, field.name) for field in self.fields()
9495
}
95-
return "<%s %s>" % (self.type_name().split(".")[-1], attribute_values)
96+
return "<%s %s>" % (self.type_name().split(".")[-1],
97+
json.dumps(attribute_values, indent=4, default=str))
9698

9799
def __eq__(self, other):
98100
return (isinstance(other, DbObject) and

labelbox/orm/model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def _attributes_of_type(cls, attr_type):
389389

390390
@classmethod
391391
def fields(cls):
392-
""" Returns a generateor that yields all the Fields declared in a
392+
""" Returns a generator that yields all the Fields declared in a
393393
concrete subclass.
394394
"""
395395
for attr in cls._attributes_of_type(Field):
@@ -398,7 +398,7 @@ def fields(cls):
398398

399399
@classmethod
400400
def relationships(cls):
401-
""" Returns a generateor that yields all the Relationships declared in
401+
""" Returns a generator that yields all the Relationships declared in
402402
a concrete subclass.
403403
"""
404404
return cls._attributes_of_type(Relationship)

labelbox/schema/media_type.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,9 @@ def get_supported_members(cls):
4444
item for item in cls.__members__
4545
if item not in ["Unknown", "Unsupported"]
4646
]
47+
48+
49+
def get_media_type_validation_error(media_type):
50+
return TypeError(f"{media_type} is not a valid media type. Use"
51+
f" any of {MediaType.get_supported_members()}"
52+
" from MediaType. Example: MediaType.Image.")

labelbox/schema/project.py

Lines changed: 127 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
from labelbox.orm.model import Entity, Field, Relationship
1919
from labelbox.pagination import PaginatedCollection
2020
from labelbox.schema.consensus_settings import ConsensusSettings
21+
from labelbox.schema.data_row import DataRow
2122
from labelbox.schema.media_type import MediaType
2223
from labelbox.schema.queue_mode import QueueMode
2324
from labelbox.schema.resource_tag import ResourceTag
24-
from labelbox.schema.data_row import DataRow
2525

2626
if TYPE_CHECKING:
2727
from labelbox import BulkImportRequest
@@ -608,22 +608,31 @@ def create_batch(self,
608608

609609
self._wait_until_data_rows_are_processed(
610610
dr_ids, self._wait_processing_max_seconds)
611-
method = 'createBatchV2'
612-
query_str = """mutation %sPyApi($projectId: ID!, $batchInput: CreateBatchInput!) {
613-
project(where: {id: $projectId}) {
614-
%s(input: $batchInput) {
615-
batch {
616-
%s
617-
}
618-
failedDataRowIds
619-
}
620-
}
621-
}
622-
""" % (method, method, query.results_query_part(Entity.Batch))
623611

624612
if consensus_settings:
625613
consensus_settings = ConsensusSettings(**consensus_settings).dict(
626614
by_alias=True)
615+
616+
if len(dr_ids) >= 10_000:
617+
return self._create_batch_async(name, dr_ids, priority,
618+
consensus_settings)
619+
else:
620+
return self._create_batch_sync(name, dr_ids, priority,
621+
consensus_settings)
622+
623+
def _create_batch_sync(self, name, dr_ids, priority, consensus_settings):
624+
method = 'createBatchV2'
625+
query_str = """mutation %sPyApi($projectId: ID!, $batchInput: CreateBatchInput!) {
626+
project(where: {id: $projectId}) {
627+
%s(input: $batchInput) {
628+
batch {
629+
%s
630+
}
631+
failedDataRowIds
632+
}
633+
}
634+
}
635+
""" % (method, method, query.results_query_part(Entity.Batch))
627636
params = {
628637
"projectId": self.uid,
629638
"batchInput": {
@@ -633,7 +642,6 @@ def create_batch(self,
633642
"consensusSettings": consensus_settings
634643
}
635644
}
636-
637645
res = self.client.execute(query_str,
638646
params,
639647
timeout=180.0,
@@ -645,6 +653,111 @@ def create_batch(self,
645653
batch,
646654
failed_data_row_ids=res['failedDataRowIds'])
647655

656+
def _create_batch_async(self,
657+
name: str,
658+
dr_ids: List[str],
659+
priority: int = 5,
660+
consensus_settings: Optional[Dict[str,
661+
float]] = None):
662+
method = 'createEmptyBatch'
663+
create_empty_batch_mutation_str = """mutation %sPyApi($projectId: ID!, $input: CreateEmptyBatchInput!) {
664+
project(where: {id: $projectId}) {
665+
%s(input: $input) {
666+
id
667+
}
668+
}
669+
}
670+
""" % (method, method)
671+
672+
params = {
673+
"projectId": self.uid,
674+
"input": {
675+
"name": name,
676+
"consensusSettings": consensus_settings
677+
}
678+
}
679+
680+
res = self.client.execute(create_empty_batch_mutation_str,
681+
params,
682+
timeout=180.0,
683+
experimental=True)["project"][method]
684+
batch_id = res['id']
685+
686+
method = 'addDataRowsToBatchAsync'
687+
add_data_rows_mutation_str = """mutation %sPyApi($projectId: ID!, $input: AddDataRowsToBatchInput!) {
688+
project(where: {id: $projectId}) {
689+
%s(input: $input) {
690+
taskId
691+
}
692+
}
693+
}
694+
""" % (method, method)
695+
696+
params = {
697+
"projectId": self.uid,
698+
"input": {
699+
"batchId": batch_id,
700+
"dataRowIds": dr_ids,
701+
"priority": priority,
702+
}
703+
}
704+
705+
res = self.client.execute(add_data_rows_mutation_str,
706+
params,
707+
timeout=180.0,
708+
experimental=True)["project"][method]
709+
710+
task_id = res['taskId']
711+
712+
timeout_seconds = 600
713+
sleep_time = 2
714+
get_task_query_str = """query %s($taskId: ID!) {
715+
task(where: {id: $taskId}) {
716+
status
717+
}
718+
}
719+
""" % "getTaskPyApi"
720+
721+
while True:
722+
task_status = self.client.execute(
723+
get_task_query_str, {'taskId': task_id},
724+
experimental=True)['task']['status']
725+
726+
if task_status == "COMPLETE":
727+
# obtain batch entity to return
728+
get_batch_str = """query %s($projectId: ID!, $batchId: ID!) {
729+
project(where: {id: $projectId}) {
730+
batches(where: {id: $batchId}) {
731+
nodes {
732+
%s
733+
}
734+
}
735+
}
736+
}
737+
""" % ("getProjectBatchPyApi",
738+
query.results_query_part(Entity.Batch))
739+
740+
batch = self.client.execute(
741+
get_batch_str, {
742+
"projectId": self.uid,
743+
"batchId": batch_id
744+
},
745+
timeout=180.0,
746+
experimental=True)["project"]["batches"]["nodes"][0]
747+
748+
# TODO async endpoints currently do not provide failed_data_row_ids in response
749+
return Entity.Batch(self.client, self.uid, batch)
750+
elif task_status == "IN_PROGRESS":
751+
timeout_seconds -= sleep_time
752+
if timeout_seconds <= 0:
753+
raise LabelboxError(
754+
f"Timed out while waiting for batch to be created.")
755+
logger.debug("Creating batch, waiting for server...", self.uid)
756+
time.sleep(sleep_time)
757+
continue
758+
else:
759+
raise LabelboxError(f"Batch was not created successfully.")
760+
648761
def _update_queue_mode(self, mode: "QueueMode") -> "QueueMode":
649762
"""
650763
Updates the queueing mode of this project.

tests/integration/test_batch.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ def test_create_batch(batch_project: Project, big_dataset: Dataset):
5555
assert batch.size == len(data_rows)
5656

5757

58+
def test_create_batch_async(batch_project: Project, big_dataset: Dataset):
59+
data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
60+
batch_project._wait_until_data_rows_are_processed(
61+
data_rows, batch_project._wait_processing_max_seconds)
62+
batch = batch_project._create_batch_async("big-batch", data_rows, 3)
63+
assert batch.name == "big-batch"
64+
assert batch.size == len(data_rows)
65+
66+
5867
def test_create_batch_with_consensus_settings(batch_project: Project,
5968
small_dataset: Dataset):
6069
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]

tests/integration/test_data_rows.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,7 @@ def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image):
699699
assert task.status == "FAILED"
700700
assert len(task.failed_data_rows) > 0
701701
assert len(list(dataset.data_rows())) == 0
702-
assert task.errors == "Import job failed"
702+
assert task.errors == "Data rows contain empty string or duplicate global keys, which are not allowed"
703703

704704
task = dataset.create_data_rows([{
705705
DataRow.row_data: sample_image,

0 commit comments

Comments
 (0)