Skip to content

[PLT-1993] Fixed relationship label bug #1918

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,24 +1,9 @@
import copy
import logging
import uuid
from collections import defaultdict, deque
from typing import Any, Deque, Dict, Generator, List, Set, Union
from typing import Any, Dict, Generator

from labelbox.data.annotation_types.annotation import ObjectAnnotation
from labelbox.data.annotation_types.classification.classification import (
ClassificationAnnotation,
)
from labelbox.data.annotation_types.metrics.confusion_matrix import (
ConfusionMatrixMetric,
)
from labelbox.data.annotation_types.metrics.scalar import ScalarMetric
from labelbox.data.annotation_types.video import VideoMaskAnnotation

from ...annotation_types.collection import LabelCollection
from ...annotation_types.relationship import RelationshipAnnotation
from ...annotation_types.mmc import MessageEvaluationTaskAnnotation
from .label import NDLabel
import copy

logger = logging.getLogger(__name__)

Expand All @@ -42,67 +27,8 @@ def serialize(
Returns:
A generator for accessing the ndjson representation of the data
"""
used_uuids: Set[uuid.UUID] = set()

relationship_uuids: Dict[uuid.UUID, Deque[uuid.UUID]] = defaultdict(
deque
)

# UUIDs are private properties used to enhance UX when defining relationships.
# They are created for all annotations, but only utilized for relationships.
# To avoid overwriting, UUIDs must be unique across labels.
# Non-relationship annotation UUIDs are regenerated when they are reused.
# For relationship annotations, during first pass, we update the UUIDs of the source and target annotations.
# During the second pass, we update the UUIDs of the annotations referenced by the relationship annotations.
for label in labels:
uuid_safe_annotations: List[
Union[
ClassificationAnnotation,
ObjectAnnotation,
VideoMaskAnnotation,
ScalarMetric,
ConfusionMatrixMetric,
RelationshipAnnotation,
MessageEvaluationTaskAnnotation,
]
] = []
# First pass to get all RelationshipAnnotaitons
# and update the UUIDs of the source and target annotations
for annotation in label.annotations:
if isinstance(annotation, RelationshipAnnotation):
annotation = copy.deepcopy(annotation)
new_source_uuid = uuid.uuid4()
new_target_uuid = uuid.uuid4()
relationship_uuids[annotation.value.source._uuid].append(
new_source_uuid
)
relationship_uuids[annotation.value.target._uuid].append(
new_target_uuid
)
annotation.value.source._uuid = new_source_uuid
annotation.value.target._uuid = new_target_uuid
if annotation._uuid in used_uuids:
annotation._uuid = uuid.uuid4()
used_uuids.add(annotation._uuid)
uuid_safe_annotations.append(annotation)
# Second pass to update UUIDs for annotations referenced by RelationshipAnnotations
for annotation in label.annotations:
if not isinstance(
annotation, RelationshipAnnotation
) and hasattr(annotation, "_uuid"):
annotation = copy.deepcopy(annotation)
next_uuids = relationship_uuids[annotation._uuid]
if len(next_uuids) > 0:
annotation._uuid = next_uuids.popleft()

if annotation._uuid in used_uuids:
annotation._uuid = uuid.uuid4()
used_uuids.add(annotation._uuid)
uuid_safe_annotations.append(annotation)
else:
if not isinstance(annotation, RelationshipAnnotation):
uuid_safe_annotations.append(annotation)
label.annotations = uuid_safe_annotations
for example in NDLabel.from_common([label]):
annotation_uuid = getattr(example, "uuid", None)
res = example.model_dump(
Expand Down
165 changes: 32 additions & 133 deletions libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from collections import defaultdict
import copy
from itertools import groupby
from operator import itemgetter
from typing import Dict, Generator, List, Tuple, Union
from typing import Generator, List, Tuple, Union
from uuid import uuid4

from pydantic import BaseModel

from ...annotation_types.annotation import (
ClassificationAnnotation,
ObjectAnnotation,
)
from ...annotation_types.collection import LabelCollection, LabelGenerator
from ...annotation_types.data.generic_data_row_data import GenericDataRowData
from ...annotation_types.collection import LabelCollection
from ...annotation_types.label import Label
from ...annotation_types.llm_prompt_response.prompt import (
PromptClassificationAnnotation,
Expand All @@ -23,7 +24,6 @@
VideoMaskAnnotation,
VideoObjectAnnotation,
)
from .base import DataRow
from .classification import (
NDChecklistSubclass,
NDClassification,
Expand Down Expand Up @@ -60,143 +60,19 @@
class NDLabel(BaseModel):
annotations: AnnotationType

class _Relationship(BaseModel):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This stuff was used when we use to deserialized so not needed anymore also is very complicated

"""This object holds information about the relationship"""

ndjson: NDRelationship
source: str
target: str

class _AnnotationGroup(BaseModel):
"""Stores all the annotations and relationships per datarow"""

data_row: DataRow = None
ndjson_annotations: Dict[str, AnnotationType] = {}
relationships: List["NDLabel._Relationship"] = []

def to_common(self) -> LabelGenerator:
annotation_groups = defaultdict(NDLabel._AnnotationGroup)

for ndjson_annotation in self.annotations:
key = (
ndjson_annotation.data_row.id
or ndjson_annotation.data_row.global_key
)
group = annotation_groups[key]

if isinstance(ndjson_annotation, NDRelationship):
group.relationships.append(
NDLabel._Relationship(
ndjson=ndjson_annotation,
source=ndjson_annotation.relationship.source,
target=ndjson_annotation.relationship.target,
)
)
else:
# if this is the first object in this group, we
# take note of the DataRow this group belongs to
# and store it in the _AnnotationGroupTuple
if not group.ndjson_annotations:
group.data_row = ndjson_annotation.data_row

# if this assertion fails and it's a valid case,
# we need to change the value type of
# `_AnnotationGroupTuple.ndjson_objects` to accept a list of objects
# and adapt the code to support duplicate UUIDs
assert (
ndjson_annotation.uuid not in group.ndjson_annotations
), f"UUID '{ndjson_annotation.uuid}' is not unique"

group.ndjson_annotations[ndjson_annotation.uuid] = (
ndjson_annotation
)

return LabelGenerator(
data=self._generate_annotations(annotation_groups)
)

@classmethod
def from_common(
cls, data: LabelCollection
) -> Generator["NDLabel", None, None]:
for label in data:
if all(
isinstance(model, RelationshipAnnotation)
for model in label.annotations
):
yield from cls._create_relationship_annotations(label)
yield from cls._create_non_video_annotations(label)
yield from cls._create_video_annotations(label)

def _generate_annotations(
self, annotation_groups: Dict[str, _AnnotationGroup]
) -> Generator[Label, None, None]:
for _, group in annotation_groups.items():
relationship_annotations: Dict[str, ObjectAnnotation] = {}
annotations = []
# first, we iterate through all the NDJSON objects and store the
# deserialized objects in the _AnnotationGroupTuple
# object *if* the object can be used in a relationship
for uuid, ndjson_annotation in group.ndjson_annotations.items():
if isinstance(ndjson_annotation, NDSegments):
annotations.extend(
NDSegments.to_common(
ndjson_annotation,
ndjson_annotation.name,
ndjson_annotation.schema_id,
)
)
elif isinstance(ndjson_annotation, NDVideoMasks):
annotations.append(
NDVideoMasks.to_common(ndjson_annotation)
)
elif isinstance(ndjson_annotation, NDObjectType.__args__):
annotation = NDObject.to_common(ndjson_annotation)
annotations.append(annotation)
relationship_annotations[uuid] = annotation
elif isinstance(
ndjson_annotation, NDClassificationType.__args__
):
annotations.extend(
NDClassification.to_common(ndjson_annotation)
)
elif isinstance(
ndjson_annotation, (NDScalarMetric, NDConfusionMatrixMetric)
):
annotations.append(
NDMetricAnnotation.to_common(ndjson_annotation)
)
elif isinstance(ndjson_annotation, NDPromptClassificationType):
annotation = NDPromptClassification.to_common(
ndjson_annotation
)
annotations.append(annotation)
elif isinstance(ndjson_annotation, NDMessageTask):
annotations.append(ndjson_annotation.to_common())
else:
raise TypeError(
f"Unsupported annotation. {type(ndjson_annotation)}"
)

# after all the annotations have been discovered, we can now create
# the relationship objects and use references to the objects
# involved
for relationship in group.relationships:
try:
source, target = (
relationship_annotations[relationship.source],
relationship_annotations[relationship.target],
)
except KeyError:
raise ValueError(
f"Relationship object refers to nonexistent object with UUID '{relationship.source}' and/or '{relationship.target}'"
)
annotations.append(
NDRelationship.to_common(
relationship.ndjson, source, target
)
)

yield Label(
annotations=annotations,
data=GenericDataRowData,
)

@staticmethod
def _get_consecutive_frames(
frames_indices: List[int],
Expand Down Expand Up @@ -317,3 +193,26 @@ def _create_non_video_annotations(cls, label: Label):
raise TypeError(
f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`"
)

def _create_relationship_annotations(cls, label: Label):
relationship_annotations = [
annotation
for annotation in label.annotations
if isinstance(annotation, RelationshipAnnotation)
]
for relationship_annotation in relationship_annotations:
uuid1 = uuid4()
uuid2 = uuid4()
source = copy.copy(relationship_annotation.value.source)
target = copy.copy(relationship_annotation.value.target)
if not isinstance(source, ObjectAnnotation) or not isinstance(
target, ObjectAnnotation
):
raise TypeError(
f"Unable to create relationship with non ObjectAnnotations. `Source: {type(source)} Target: {type(target)}`"
)
if not source._uuid:
source._uuid = uuid1
if not target._uuid:
target._uuid = uuid2
yield relationship_annotation
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,6 @@ def test_create_from_label_objects(
annotations=[
ObjectAnnotation(
name="polygon",
extra={
"uuid": "6d10fa30-3ea0-4e6c-bbb1-63f5c29fe3e4",
},
value=Polygon(
points=[
Point(x=147.692, y=118.154),
Expand All @@ -233,19 +230,13 @@ def test_create_from_label_objects(
),
ObjectAnnotation(
name="bbox",
extra={
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be breaking but want a second opinion. The extra field breaks the annotation if a customer decides to add it. It is not shown that they need to do this in the docs, and I don't see why they would. The old complicated logic removed these but to simplify this I think it is best to just not support this type of extra key

"uuid": "15b7138f-4bbc-42c5-ae79-45d87b0a3b2a",
},
value=Rectangle(
start=Point(x=58.0, y=48.0),
end=Point(x=70.0, y=113.0),
),
),
ObjectAnnotation(
name="polyline",
extra={
"uuid": "cf4c6df9-c39c-4fbc-9541-470f6622978a",
},
value=Line(
points=[
Point(x=147.692, y=118.154),
Expand Down
Loading
Loading