Skip to content

[AL-5662] Add batch export v2 #1116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions labelbox/schema/batch.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Generator, TYPE_CHECKING

from labelbox.orm.db_object import DbObject, experimental
from labelbox.orm import query
from labelbox.orm.model import Entity, Field, Relationship
Expand Down
14 changes: 13 additions & 1 deletion labelbox/schema/export_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ class SharedExportFilters(TypedDict):


class ProjectExportFilters(SharedExportFilters):
pass
batch_id: Optional[str]
""" Batch id to export
Example:
>>> "clgo3lyax0000veeezdbu3ws4"
"""


class DatasetExportFilters(SharedExportFilters):
Expand Down Expand Up @@ -178,4 +182,12 @@ def _get_timezone() -> str:
"operator": "is",
"type": "data_row_id"
})

batch_id = filters.get("batch_id")
if batch_id:
search_query.append({
"ids": [batch_id],
"operator": "is",
"type": "batch"
})
return search_query
1 change: 1 addition & 0 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ def export_v2(self,
"last_activity_at": None,
"label_created_at": None,
"data_row_ids": None,
"batch_id": None,
})

mutation_name = "exportDataRowsInProject"
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,4 @@ def test_delete_labels_with_templates(batch_project: Project,
exported_data_rows = list(batch.export_data_rows())
res = batch.delete_labels(labels_as_template=True)
exported_data_rows = list(batch.export_data_rows())
assert len(exported_data_rows) == 5
assert len(exported_data_rows) == 5
40 changes: 40 additions & 0 deletions tests/integration/test_project.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import time
import os
from typing import Tuple
import uuid

import pytest
import requests

from labelbox import Project, LabelingFrontend, Dataset
from labelbox.exceptions import InvalidQueryError
from labelbox.schema.data_row import DataRow
from labelbox.schema.label import Label
from labelbox.schema.media_type import MediaType
from labelbox.schema.queue_mode import QueueMode

Expand Down Expand Up @@ -42,6 +45,43 @@ def test_project(client, rand_gen):
assert project not in projects


def test_batch_project_export_v2(
configured_batch_project_with_label: Tuple[Project, Dataset, DataRow,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: We don't need to define all the return types since this is a test. May be cleaner to not have to import all of the types too

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to leave it in case anyone needs to do modification, the person will have full intellisense support, without looking back at the definitions. That's what I always find hard about our python services - we're missing type annotations in a lot of places and the code is hard to reason about without poking around.

Label],
export_v2_test_helpers, dataset: Dataset, image_url: str):
project, dataset, *_ = configured_batch_project_with_label

batch = list(project.batches())[0]
filters = {
"last_activity_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"label_created_at": ["2000-01-01 00:00:00", "2050-01-01 00:00:00"],
"batch_id": batch.uid,
}
params = {
"include_performance_details": True,
"include_labels": True,
"media_type_override": MediaType.Image
}
task_name = "test_batch_export_v2"
task = dataset.create_data_rows([
{
"row_data": image_url,
"external_id": "my-image"
},
] * 2)
task.wait_till_done()
data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
batch_one = f'batch one {uuid.uuid4()}'

# This test creates two batches, only one batch should be exporter
# Creatin second batch that will not be used in the export due to the filter: batch_id
project.create_batch(batch_one, data_rows)

task_results = export_v2_test_helpers.run_project_export_v2_task(
project, task_name=task_name, filters=filters, params=params)
assert (batch.size == len(task_results))


def test_project_export_v2(client, export_v2_test_helpers,
configured_project_with_label,
wait_for_data_row_processing):
Expand Down