Skip to content

Commit 2eec07b

Browse files
committed
remove custom datapoints in tz retrieve fn
1 parent 4a3c0d1 commit 2eec07b

File tree

11 files changed

+18
-1516
lines changed

11 files changed

+18
-1516
lines changed

cognite/client/_api/datapoint_tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
TypeVar,
2020
cast,
2121
)
22+
from zoneinfo import ZoneInfo
2223

2324
from google.protobuf.internal.containers import RepeatedCompositeFieldContainer
2425

@@ -41,7 +42,6 @@
4142
from cognite.client.utils._auxiliary import exactly_one_is_not_none, is_finite, is_unlimited
4243
from cognite.client.utils._text import convert_all_keys_to_snake_case, to_snake_case
4344
from cognite.client.utils._time import (
44-
ZoneInfo,
4545
align_start_and_end_for_granularity,
4646
convert_timezone_to_str,
4747
granularity_to_ms,

cognite/client/_api/datapoints.py

Lines changed: 1 addition & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import itertools
77
import math
88
import time
9-
import warnings
109
from abc import ABC, abstractmethod
1110
from collections import Counter, defaultdict
1211
from collections.abc import Callable, Iterable, Iterator, MutableSequence, Sequence
@@ -22,6 +21,7 @@
2221
cast,
2322
overload,
2423
)
24+
from zoneinfo import ZoneInfo
2525

2626
from typing_extensions import Self
2727

@@ -59,13 +59,7 @@
5959
from cognite.client.utils._identifier import Identifier, IdentifierSequence, IdentifierSequenceCore
6060
from cognite.client.utils._importing import import_as_completed, local_import
6161
from cognite.client.utils._time import (
62-
ZoneInfo,
63-
align_large_granularity,
64-
pandas_date_range_tz,
6562
timestamp_to_ms,
66-
to_fixed_utc_intervals,
67-
to_pandas_freq,
68-
validate_timezone,
6963
)
7064
from cognite.client.utils._validation import validate_user_input_dict_with_identifier
7165
from cognite.client.utils.useful_types import SequenceNotStr
@@ -1233,145 +1227,6 @@ def retrieve_dataframe(
12331227
freq = cast(str, granularity).replace("m", "min")
12341228
return df.reindex(pd.date_range(start=start, end=end, freq=freq, inclusive="left"))
12351229

1236-
# TODO: Deprecated, don't add support for new features like instance_id
1237-
def retrieve_dataframe_in_tz(
1238-
self,
1239-
*,
1240-
id: int | Sequence[int] | None = None,
1241-
external_id: str | SequenceNotStr[str] | None = None,
1242-
start: datetime.datetime,
1243-
end: datetime.datetime,
1244-
aggregates: Aggregate | str | list[Aggregate | str] | None = None,
1245-
granularity: str | None = None,
1246-
target_unit: str | None = None,
1247-
target_unit_system: str | None = None,
1248-
ignore_unknown_ids: bool = False,
1249-
include_status: bool = False,
1250-
ignore_bad_datapoints: bool = True,
1251-
treat_uncertain_as_bad: bool = True,
1252-
uniform_index: bool = False,
1253-
include_aggregate_name: bool = True,
1254-
include_granularity_name: bool = False,
1255-
column_names: Literal["id", "external_id"] = "external_id",
1256-
) -> pd.DataFrame:
1257-
"""Get datapoints directly in a pandas dataframe in the same timezone as ``start`` and ``end``.
1258-
1259-
.. admonition:: Deprecation Warning
1260-
1261-
This SDK function is deprecated and will be removed in the next major release. Reason: Cognite Data
1262-
Fusion now has native support for timezone and calendar-based aggregations. Please consider migrating
1263-
already today: The API also supports fixed offsets, yields more accurate results and have better support
1264-
for exotic timezones and unusual DST offsets. You can use the normal retrieve methods instead, just
1265-
pass 'timezone' as a parameter.
1266-
1267-
Args:
1268-
id (int | Sequence[int] | None): ID or list of IDs.
1269-
external_id (str | SequenceNotStr[str] | None): External ID or list of External IDs.
1270-
start (datetime.datetime): Inclusive start, must be timezone aware.
1271-
end (datetime.datetime): Exclusive end, must be timezone aware and have the same timezone as start.
1272-
aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Available options: ``average``, ``continuous_variance``, ``count``, ``count_bad``, ``count_good``, ``count_uncertain``, ``discrete_variance``, ``duration_bad``, ``duration_good``, ``duration_uncertain``, ``interpolation``, ``max``, ``min``, ``step_interpolation``, ``sum`` and ``total_variation``. Default: None (raw datapoints returned)
1273-
granularity (str | None): The granularity to fetch aggregates at. Can be given as an abbreviation or spelled out for clarity: ``s/second(s)``, ``m/minute(s)``, ``h/hour(s)``, ``d/day(s)``, ``w/week(s)``, ``mo/month(s)``, ``q/quarter(s)``, or ``y/year(s)``. Examples: ``30s``, ``5m``, ``1day``, ``2weeks``. Default: None.
1274-
target_unit (str | None): The unit_external_id of the datapoints returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system.
1275-
target_unit_system (str | None): The unit system of the datapoints returned. Cannot be used with target_unit.
1276-
ignore_unknown_ids (bool): Whether to ignore missing time series rather than raising an exception. Default: False
1277-
include_status (bool): Also return the status code, an integer, for each datapoint in the response. Only relevant for raw datapoint queries, not aggregates.
1278-
ignore_bad_datapoints (bool): Treat datapoints with a bad status code as if they do not exist. If set to false, raw queries will include bad datapoints in the response, and aggregates will in general omit the time period between a bad datapoint and the next good datapoint. Also, the period between a bad datapoint and the previous good datapoint will be considered constant. Default: True.
1279-
treat_uncertain_as_bad (bool): Treat datapoints with uncertain status codes as bad. If false, treat datapoints with uncertain status codes as good. Used for both raw queries and aggregates. Default: True.
1280-
uniform_index (bool): If querying aggregates with a non-calendar granularity, specifying ``uniform_index=True`` will return a dataframe with an index with constant spacing between timestamps decided by granularity all the way from `start` to `end` (missing values will be NaNs). Default: False
1281-
include_aggregate_name (bool): Include 'aggregate' in the column name, e.g. `my-ts|average`. Ignored for raw time series. Default: True
1282-
include_granularity_name (bool): Include 'granularity' in the column name, e.g. `my-ts|12h`. Added after 'aggregate' when present. Ignored for raw time series. Default: False
1283-
column_names (Literal['id', 'external_id']): Use either ids or external ids as column names. Time series missing external id will use id as backup. Default: "external_id"
1284-
1285-
Returns:
1286-
pd.DataFrame: A pandas DataFrame containing the requested time series with a DatetimeIndex localized in the given timezone.
1287-
"""
1288-
warnings.warn(
1289-
(
1290-
"This SDK method, `retrieve_dataframe_in_tz`, is deprecated and will be removed in the next major release. "
1291-
"Reason: Cognite Data Fusion now has native support for timezone and calendar-based aggregations. Please "
1292-
"consider migrating already today: The API also supports fixed offsets, yields more accurate results and "
1293-
"have better support for exotic timezones and unusual DST offsets. You can use the normal retrieve methods "
1294-
"instead, just pass 'timezone' as a parameter."
1295-
),
1296-
UserWarning,
1297-
)
1298-
_, pd = local_import("numpy", "pandas") # Verify that deps are available or raise CogniteImportError
1299-
1300-
if not exactly_one_is_not_none(id, external_id):
1301-
raise ValueError("Either input id(s) or external_id(s)")
1302-
1303-
if exactly_one_is_not_none(aggregates, granularity):
1304-
raise ValueError(
1305-
"Got only one of 'aggregates' and 'granularity'. "
1306-
"Pass both to get aggregates, or neither to get raw data"
1307-
)
1308-
tz = validate_timezone(start, end)
1309-
if aggregates is None and granularity is None:
1310-
# For raw data, we only need to convert the timezone:
1311-
return (
1312-
# TODO: include_outside_points is missing
1313-
self.retrieve_dataframe(
1314-
id=id,
1315-
external_id=external_id,
1316-
start=start,
1317-
end=end,
1318-
aggregates=aggregates,
1319-
granularity=granularity,
1320-
target_unit=target_unit,
1321-
target_unit_system=target_unit_system,
1322-
ignore_unknown_ids=ignore_unknown_ids,
1323-
include_status=include_status,
1324-
ignore_bad_datapoints=ignore_bad_datapoints,
1325-
treat_uncertain_as_bad=treat_uncertain_as_bad,
1326-
uniform_index=uniform_index,
1327-
include_aggregate_name=include_aggregate_name,
1328-
include_granularity_name=include_granularity_name,
1329-
column_names=column_names,
1330-
limit=None,
1331-
)
1332-
.tz_localize("utc")
1333-
.tz_convert(str(tz))
1334-
)
1335-
assert isinstance(granularity, str) # mypy
1336-
1337-
identifiers = IdentifierSequence.load(id, external_id)
1338-
if not identifiers.are_unique():
1339-
duplicated = find_duplicates(identifiers.as_primitives())
1340-
raise ValueError(f"The following identifiers were not unique: {duplicated}")
1341-
1342-
intervals = to_fixed_utc_intervals(start, end, granularity)
1343-
queries = [
1344-
{**ident_dct, "aggregates": aggregates, **interval}
1345-
for ident_dct, interval in itertools.product(identifiers.as_dicts(), intervals)
1346-
]
1347-
arrays = self.retrieve_arrays(
1348-
limit=None,
1349-
ignore_unknown_ids=ignore_unknown_ids,
1350-
include_status=include_status,
1351-
ignore_bad_datapoints=ignore_bad_datapoints,
1352-
treat_uncertain_as_bad=treat_uncertain_as_bad,
1353-
target_unit=target_unit,
1354-
target_unit_system=target_unit_system,
1355-
**{identifiers[0].name(): queries}, # type: ignore [arg-type]
1356-
)
1357-
assert isinstance(arrays, DatapointsArrayList) # mypy
1358-
1359-
arrays.concat_duplicate_ids()
1360-
for arr in arrays:
1361-
# In case 'include_granularity_name' is used, we don't want '2quarters' to show up as '4343h':
1362-
arr.granularity = granularity
1363-
df = (
1364-
arrays.to_pandas(column_names, include_aggregate_name, include_granularity_name)
1365-
.tz_localize("utc")
1366-
.tz_convert(str(tz))
1367-
)
1368-
if uniform_index:
1369-
freq = to_pandas_freq(granularity, start)
1370-
# TODO: Bug, "small" granularities like s/m/h raise here:
1371-
start, end = align_large_granularity(start, end, granularity)
1372-
return df.reindex(pandas_date_range_tz(start, end, freq, inclusive="left"))
1373-
return df
1374-
13751230
def retrieve_latest(
13761231
self,
13771232
id: int | LatestDatapointQuery | list[int | LatestDatapointQuery] | None = None,

cognite/client/data_classes/data_modeling/instances.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
from cognite.client.utils._identifier import InstanceId
6767
from cognite.client.utils._importing import local_import
6868
from cognite.client.utils._text import convert_all_keys_to_snake_case, to_camel_case
69-
from cognite.client.utils._time import convert_data_modelling_timestamp
69+
from cognite.client.utils._time import convert_data_modeling_timestamp
7070
from cognite.client.utils.useful_types import SequenceNotStr
7171

7272
if TYPE_CHECKING:
@@ -1697,7 +1697,7 @@ def _deserialize_value(value: Any, parameter: inspect.Parameter) -> Any:
16971697
return value
16981698
annotation = str(parameter.annotation)
16991699
if "datetime" in annotation and isinstance(value, str):
1700-
return convert_data_modelling_timestamp(value)
1700+
return convert_data_modeling_timestamp(value)
17011701
elif "date" in annotation and isinstance(value, str):
17021702
return date.fromisoformat(value)
17031703
elif isinstance(value, dict):

cognite/client/data_classes/datapoints.py

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
TypedDict,
1919
overload,
2020
)
21+
from zoneinfo import ZoneInfo
2122

2223
from typing_extensions import NotRequired, Self
2324

@@ -41,7 +42,6 @@
4142
to_snake_case,
4243
)
4344
from cognite.client.utils._time import (
44-
ZoneInfo,
4545
convert_and_isoformat_timestamp,
4646
convert_timezone_to_str,
4747
parse_str_timezone,
@@ -1270,45 +1270,6 @@ def __init__(self, resources: Collection[Any], cognite_client: CogniteClient | N
12701270
self._external_id_to_item.update(xid_dct)
12711271
self._instance_id_to_item.update(inst_id_dct)
12721272

1273-
def concat_duplicate_ids(self) -> None:
1274-
"""
1275-
Concatenates all arrays with duplicated IDs.
1276-
1277-
Arrays with the same ids are stacked in chronological order.
1278-
1279-
**Caveat** This method is not guaranteed to preserve the order of the list.
1280-
"""
1281-
# Rebuilt list instead of removing duplicated one at a time at the cost of O(n).
1282-
self.data.clear()
1283-
1284-
# This implementation takes advantage of the ordering of the duplicated in the __init__ method
1285-
has_external_ids = set()
1286-
for ext_id, items in self._external_id_to_item.items():
1287-
if not isinstance(items, list):
1288-
self.data.append(items)
1289-
if items.id is not None:
1290-
has_external_ids.add(items.id)
1291-
continue
1292-
concatenated = DatapointsArray.create_from_arrays(*items)
1293-
self._external_id_to_item[ext_id] = concatenated
1294-
if concatenated.id is not None:
1295-
has_external_ids.add(concatenated.id)
1296-
self._id_to_item[concatenated.id] = concatenated
1297-
self.data.append(concatenated)
1298-
1299-
if not (only_ids := set(self._id_to_item) - has_external_ids):
1300-
return
1301-
1302-
for id_, items in self._id_to_item.items():
1303-
if id_ not in only_ids:
1304-
continue
1305-
if not isinstance(items, list):
1306-
self.data.append(items)
1307-
continue
1308-
concatenated = DatapointsArray.create_from_arrays(*items)
1309-
self._id_to_item[id_] = concatenated
1310-
self.data.append(concatenated)
1311-
13121273
def get( # type: ignore [override]
13131274
self,
13141275
id: int | None = None,

cognite/client/utils/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from __future__ import annotations
22

3+
from zoneinfo import ZoneInfo
4+
35
from cognite.client.utils._time import (
46
MAX_TIMESTAMP_MS,
57
MIN_TIMESTAMP_MS,
6-
ZoneInfo,
78
datetime_to_ms,
89
datetime_to_ms_iso_timestamp,
910
ms_to_datetime,
@@ -13,7 +14,7 @@
1314
__all__ = [
1415
"MAX_TIMESTAMP_MS",
1516
"MIN_TIMESTAMP_MS",
16-
"ZoneInfo",
17+
"ZoneInfo", # for backwards compat. (when we supported >=3.8)
1718
"datetime_to_ms",
1819
"datetime_to_ms_iso_timestamp",
1920
"ms_to_datetime",

cognite/client/utils/_pandas_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
from itertools import chain
99
from numbers import Integral
1010
from typing import TYPE_CHECKING, Any, Literal
11+
from zoneinfo import ZoneInfo
1112

1213
from cognite.client.exceptions import CogniteImportError
1314
from cognite.client.utils._importing import local_import
1415
from cognite.client.utils._text import to_camel_case
15-
from cognite.client.utils._time import TIME_ATTRIBUTES, ZoneInfo
16+
from cognite.client.utils._time import TIME_ATTRIBUTES
1617

1718
if TYPE_CHECKING:
1819
import pandas as pd

0 commit comments

Comments
 (0)