From fd6231b93db8136f7da68b5c3aa84c4cfe1f49aa Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 29 May 2023 18:18:02 -0700 Subject: [PATCH 1/8] Use python dateutil package to parse iso strings --- labelbox/schema/data_row_metadata.py | 7 ++++--- labelbox/utils.py | 19 ++++++++++++++++++- tests/integration/test_data_row_metadata.py | 9 ++++----- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/labelbox/schema/data_row_metadata.py b/labelbox/schema/data_row_metadata.py index 3fd33e50a..bb367027b 100644 --- a/labelbox/schema/data_row_metadata.py +++ b/labelbox/schema/data_row_metadata.py @@ -5,10 +5,11 @@ from itertools import chain from typing import List, Optional, Dict, Union, Callable, Type, Any, Generator +from dateutil.parser import isoparse from pydantic import BaseModel, conlist, constr from labelbox.schema.ontology import SchemaId -from labelbox.utils import _CamelCaseMixin, format_iso_datetime +from labelbox.utils import _CamelCaseMixin, format_iso_datetime, format_iso_default_utc class DataRowMetadataKind(Enum): @@ -466,7 +467,7 @@ def parse_metadata_fields( value=schema.uid) elif schema.kind == DataRowMetadataKind.datetime: field = DataRowMetadataField(schema_id=schema.uid, - value=datetime.fromisoformat( + value=format_iso_default_utc( f["value"])) else: field = DataRowMetadataField(schema_id=schema.uid, @@ -838,7 +839,7 @@ def _validate_parse_number( def _validate_parse_datetime( field: DataRowMetadataField) -> List[Dict[str, Union[SchemaId, str]]]: if isinstance(field.value, str): - field.value = datetime.fromisoformat(field.value) + field.value = format_iso_default_utc(field.value) elif not isinstance(field.value, datetime): raise TypeError( f"Value for datetime fields must be either a string or datetime object. Found {type(field.value)}" diff --git a/labelbox/utils.py b/labelbox/utils.py index 5ec90f403..083d6c403 100644 --- a/labelbox/utils.py +++ b/labelbox/utils.py @@ -1,10 +1,18 @@ import datetime import re + +import datetime +from dateutil.tz import tzoffset +from dateutil.parser import isoparse as dateutil_parse +from dateutil.utils import default_tzinfo + from urllib.parse import urlparse from pydantic import BaseModel UPPERCASE_COMPONENTS = ['uri', 'rgb'] ISO_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ' +DFLT_TZ = tzoffset("UTC", 0000) + def _convert(s, sep, title): @@ -80,4 +88,13 @@ def format_iso_datetime(dt: datetime.datetime) -> str: Formats a datetime object into the format: 2011-11-04T00:05:23Z Note that datetime.isoformat() outputs 2011-11-04T00:05:23+00:00 """ - return dt.strftime(ISO_DATETIME_FORMAT) \ No newline at end of file + return dt.strftime(ISO_DATETIME_FORMAT) + + +def format_iso_default_utc(date_string: str) -> datetime.datetime: + """ + Converts a string even if offset is missing: 2011-11-04T00:05:23Z or 2011-11-04T00:05:23+00:00 or 2011-11-04T00:05:23 + to a datetime object. + For missing offsets, the default offset is UTC. + """ + return default_tzinfo(dateutil_parse(date_string), DFLT_TZ) \ No newline at end of file diff --git a/tests/integration/test_data_row_metadata.py b/tests/integration/test_data_row_metadata.py index 3c0d06d33..41f020c51 100644 --- a/tests/integration/test_data_row_metadata.py +++ b/tests/integration/test_data_row_metadata.py @@ -445,7 +445,7 @@ def test_delete_schema(mdo): @pytest.mark.parametrize('datetime_str', - ['2011-11-04T00:05:23Z', '2011-05-07T14:34:14+00:00']) + ['2011-11-04T00:05:23Z', '2011-11-04T00:05:23+00:00']) def test_upsert_datarow_date_metadata(data_row, mdo, datetime_str): metadata = [ DataRowMetadata(data_row_id=data_row.uid, @@ -458,11 +458,11 @@ def test_upsert_datarow_date_metadata(data_row, mdo, datetime_str): assert len(errors) == 0 metadata = mdo.bulk_export([data_row.uid]) - assert metadata[0].fields[0].value == datetime.fromisoformat(datetime_str) + assert f"{metadata[0].fields[0].value}" == "2011-11-04 00:05:23+00:00" @pytest.mark.parametrize('datetime_str', - ['2011-11-04T00:05:23Z', '2011-05-07T14:34:14+00:00']) + ['2011-11-04T00:05:23Z', '2011-11-04T00:05:23+00:00']) def test_create_data_row_with_metadata(dataset, image_url, datetime_str): client = dataset.client assert len(list(dataset.data_rows())) == 0 @@ -475,5 +475,4 @@ def test_create_data_row_with_metadata(dataset, image_url, datetime_str): metadata_fields=metadata_fields) retrieved_data_row = client.get_data_row(data_row.uid) - assert retrieved_data_row.metadata[0].value == datetime.fromisoformat( - datetime_str) + assert f"{retrieved_data_row.metadata[0].value}" == "2011-11-04 00:05:23+00:00" From 973016b2f497e1d65573d685bb39ff5c2c16e4ff Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 29 May 2023 18:18:57 -0700 Subject: [PATCH 2/8] Remove backports --- labelbox/__init__.py | 4 ---- requirements.txt | 24 ++++++++++++------------ setup.py | 3 +-- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/labelbox/__init__.py b/labelbox/__init__.py index d1e18b3ad..98f6a0dae 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,10 +1,6 @@ name = "labelbox" __version__ = "3.47.1" -from backports.datetime_fromisoformat import MonkeyPatch - -MonkeyPatch.patch_fromisoformat() - from labelbox.client import Client from labelbox.schema.project import Project from labelbox.schema.model import Model diff --git a/requirements.txt b/requirements.txt index 61d8b559c..b48d8bd12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,19 @@ -requests==2.22.0 backoff==1.10.0 -google-api-core>=1.22.1 -pydantic>=1.8,<2.0 -shapely -tqdm geojson +google-api-core>=1.22.1 +imagesize +nbconvert~=7.2.6 +nbformat~=5.7.0 numpy -PILLOW opencv-python -imagesize -pyproj +PILLOW +pydantic>=1.8,<2.0 pygeotile -typing-extensions==4.5.0 +pyproj pytest-xdist -nbformat~=5.7.0 -nbconvert~=7.2.6 +python-dateutil>=2.8.2,<2.9.0 +requests==2.22.0 +shapely +tqdm typeguard==2.13.3 -backports-datetime-fromisoformat~=2.0 \ No newline at end of file +typing-extensions==4.5.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 151a4ba15..e459b0f5e 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ packages=setuptools.find_packages(), install_requires=[ "backoff==1.10.0", "requests>=2.22.0", "google-api-core>=1.22.1", - "pydantic>=1.8,<2.0", "tqdm", "backports-datetime-fromisoformat~=2.0" + "pydantic>=1.8,<2.0", "tqdm", "python-dateutil>=2.8.2,<2.9.0" ], extras_require={ 'data': [ @@ -31,7 +31,6 @@ ], }, classifiers=[ - 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.7', From 9aaeb72df7ed62b93d02c312b7c3794085ac2777 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 29 May 2023 18:20:06 -0700 Subject: [PATCH 3/8] Fix formatting --- labelbox/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/labelbox/utils.py b/labelbox/utils.py index 083d6c403..76fdafc1d 100644 --- a/labelbox/utils.py +++ b/labelbox/utils.py @@ -14,7 +14,6 @@ DFLT_TZ = tzoffset("UTC", 0000) - def _convert(s, sep, title): components = re.findall(r"[A-Z][a-z0-9]*|[a-z][a-z0-9]*", s) components = list(map(str.lower, filter(None, components))) @@ -97,4 +96,4 @@ def format_iso_default_utc(date_string: str) -> datetime.datetime: to a datetime object. For missing offsets, the default offset is UTC. """ - return default_tzinfo(dateutil_parse(date_string), DFLT_TZ) \ No newline at end of file + return default_tzinfo(dateutil_parse(date_string), DFLT_TZ) From b78fbfb838f5d948892a442f0c09c16889666e02 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 29 May 2023 18:21:39 -0700 Subject: [PATCH 4/8] Make tox.ini uptodate with supported python versions --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index b98fd7fa8..8dd46d939 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ # content of: tox.ini , put in same dir as setup.py [tox] -envlist = py36, py37, py38 +envlist = py37, py38, py39 [testenv] # install pytest in the virtualenv where commands will be executed From 4d25d85802c49fc431bfca6943da36451cd9e241 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 30 May 2023 09:47:00 -0700 Subject: [PATCH 5/8] Rename function --- labelbox/schema/data_row_metadata.py | 6 +++--- labelbox/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/labelbox/schema/data_row_metadata.py b/labelbox/schema/data_row_metadata.py index bb367027b..00c86dc7a 100644 --- a/labelbox/schema/data_row_metadata.py +++ b/labelbox/schema/data_row_metadata.py @@ -9,7 +9,7 @@ from pydantic import BaseModel, conlist, constr from labelbox.schema.ontology import SchemaId -from labelbox.utils import _CamelCaseMixin, format_iso_datetime, format_iso_default_utc +from labelbox.utils import _CamelCaseMixin, format_iso_datetime, format_iso_from_string class DataRowMetadataKind(Enum): @@ -467,7 +467,7 @@ def parse_metadata_fields( value=schema.uid) elif schema.kind == DataRowMetadataKind.datetime: field = DataRowMetadataField(schema_id=schema.uid, - value=format_iso_default_utc( + value=format_iso_from_string( f["value"])) else: field = DataRowMetadataField(schema_id=schema.uid, @@ -839,7 +839,7 @@ def _validate_parse_number( def _validate_parse_datetime( field: DataRowMetadataField) -> List[Dict[str, Union[SchemaId, str]]]: if isinstance(field.value, str): - field.value = format_iso_default_utc(field.value) + field.value = format_iso_from_string(field.value) elif not isinstance(field.value, datetime): raise TypeError( f"Value for datetime fields must be either a string or datetime object. Found {type(field.value)}" diff --git a/labelbox/utils.py b/labelbox/utils.py index 76fdafc1d..1274a7113 100644 --- a/labelbox/utils.py +++ b/labelbox/utils.py @@ -1,7 +1,6 @@ import datetime import re -import datetime from dateutil.tz import tzoffset from dateutil.parser import isoparse as dateutil_parse from dateutil.utils import default_tzinfo @@ -90,10 +89,11 @@ def format_iso_datetime(dt: datetime.datetime) -> str: return dt.strftime(ISO_DATETIME_FORMAT) -def format_iso_default_utc(date_string: str) -> datetime.datetime: +def format_iso_from_string(date_string: str) -> datetime.datetime: """ Converts a string even if offset is missing: 2011-11-04T00:05:23Z or 2011-11-04T00:05:23+00:00 or 2011-11-04T00:05:23 to a datetime object. For missing offsets, the default offset is UTC. """ + # return datetime.datetime.fromisoformat(date_string) return default_tzinfo(dateutil_parse(date_string), DFLT_TZ) From b2f348d1f63fd56a627dce7202956881adf92713 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 30 May 2023 09:56:23 -0700 Subject: [PATCH 6/8] Add test --- labelbox/schema/data_row_metadata.py | 1 - tests/unit/test_utils.py | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_utils.py diff --git a/labelbox/schema/data_row_metadata.py b/labelbox/schema/data_row_metadata.py index 00c86dc7a..61982e69d 100644 --- a/labelbox/schema/data_row_metadata.py +++ b/labelbox/schema/data_row_metadata.py @@ -5,7 +5,6 @@ from itertools import chain from typing import List, Optional, Dict, Union, Callable, Type, Any, Generator -from dateutil.parser import isoparse from pydantic import BaseModel, conlist, constr from labelbox.schema.ontology import SchemaId diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py new file mode 100644 index 000000000..f1a0075b1 --- /dev/null +++ b/tests/unit/test_utils.py @@ -0,0 +1,17 @@ +import pytest +from labelbox.utils import format_iso_datetime, format_iso_from_string + + +@pytest.mark.parametrize( + 'datetime_str, expected_datetime_str', + [ + ('2011-11-04T00:05:23Z', '2011-11-04T00:05:23Z'), + ('2011-11-04T00:05:23+00:00', '2011-11-04T00:05:23Z'), + ('2011-11-04T00:05:23+05:00', '2011-11-04T00:05:23Z' + ), #NOTE not converting with timezone... this is compatible with out current implementation + ('2011-11-04T00:05:23', '2011-11-04T00:05:23Z') + ]) +def test_datetime_parsing(datetime_str, expected_datetime_str): + # NOTE I would normally not tested expected using another function from sdk code, but in this case this is exactly the usage in _validate_parse_datetime + assert format_iso_datetime( + format_iso_from_string(datetime_str)) == expected_datetime_str From 541d7c4d9565a8fb15d83fe840af834daae1a8b7 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 5 Jun 2023 09:45:35 -0700 Subject: [PATCH 7/8] PR changes Make comments read better --- tests/unit/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index f1a0075b1..ef6eded48 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -8,10 +8,10 @@ ('2011-11-04T00:05:23Z', '2011-11-04T00:05:23Z'), ('2011-11-04T00:05:23+00:00', '2011-11-04T00:05:23Z'), ('2011-11-04T00:05:23+05:00', '2011-11-04T00:05:23Z' - ), #NOTE not converting with timezone... this is compatible with out current implementation + ), # NOTE: The current implementation is not converting from timezone other then UTC. This how it has been working prior to my change. ('2011-11-04T00:05:23', '2011-11-04T00:05:23Z') ]) def test_datetime_parsing(datetime_str, expected_datetime_str): - # NOTE I would normally not tested expected using another function from sdk code, but in this case this is exactly the usage in _validate_parse_datetime + # NOTE I would normally not take 'expected' using another function from sdk code, but in this case this is exactly the usage in _validate_parse_datetime assert format_iso_datetime( format_iso_from_string(datetime_str)) == expected_datetime_str From 2924dc94c46ce9fbe5c3ce96f081692aab68ecc9 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 5 Jun 2023 21:02:23 -0700 Subject: [PATCH 8/8] Support for timezone conversion --- labelbox/utils.py | 2 +- tests/unit/test_utils.py | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/labelbox/utils.py b/labelbox/utils.py index 1274a7113..bfe39a3e2 100644 --- a/labelbox/utils.py +++ b/labelbox/utils.py @@ -86,7 +86,7 @@ def format_iso_datetime(dt: datetime.datetime) -> str: Formats a datetime object into the format: 2011-11-04T00:05:23Z Note that datetime.isoformat() outputs 2011-11-04T00:05:23+00:00 """ - return dt.strftime(ISO_DATETIME_FORMAT) + return dt.astimezone(datetime.timezone.utc).strftime(ISO_DATETIME_FORMAT) def format_iso_from_string(date_string: str) -> datetime.datetime: diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index ef6eded48..129edcd72 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -2,15 +2,11 @@ from labelbox.utils import format_iso_datetime, format_iso_from_string -@pytest.mark.parametrize( - 'datetime_str, expected_datetime_str', - [ - ('2011-11-04T00:05:23Z', '2011-11-04T00:05:23Z'), - ('2011-11-04T00:05:23+00:00', '2011-11-04T00:05:23Z'), - ('2011-11-04T00:05:23+05:00', '2011-11-04T00:05:23Z' - ), # NOTE: The current implementation is not converting from timezone other then UTC. This how it has been working prior to my change. - ('2011-11-04T00:05:23', '2011-11-04T00:05:23Z') - ]) +@pytest.mark.parametrize('datetime_str, expected_datetime_str', + [('2011-11-04T00:05:23Z', '2011-11-04T00:05:23Z'), + ('2011-11-04T00:05:23+00:00', '2011-11-04T00:05:23Z'), + ('2011-11-04T00:05:23+05:00', '2011-11-03T19:05:23Z'), + ('2011-11-04T00:05:23', '2011-11-04T00:05:23Z')]) def test_datetime_parsing(datetime_str, expected_datetime_str): # NOTE I would normally not take 'expected' using another function from sdk code, but in this case this is exactly the usage in _validate_parse_datetime assert format_iso_datetime(