|
6 | 6 | import itertools
|
7 | 7 | import math
|
8 | 8 | import time
|
9 |
| -import warnings |
10 | 9 | from abc import ABC, abstractmethod
|
11 | 10 | from collections import Counter, defaultdict
|
12 | 11 | from collections.abc import Callable, Iterable, Iterator, MutableSequence, Sequence
|
|
22 | 21 | cast,
|
23 | 22 | overload,
|
24 | 23 | )
|
| 24 | +from zoneinfo import ZoneInfo |
25 | 25 |
|
26 | 26 | from typing_extensions import Self
|
27 | 27 |
|
|
59 | 59 | from cognite.client.utils._identifier import Identifier, IdentifierSequence, IdentifierSequenceCore
|
60 | 60 | from cognite.client.utils._importing import import_as_completed, local_import
|
61 | 61 | from cognite.client.utils._time import (
|
62 |
| - ZoneInfo, |
63 |
| - align_large_granularity, |
64 |
| - pandas_date_range_tz, |
65 | 62 | timestamp_to_ms,
|
66 |
| - to_fixed_utc_intervals, |
67 |
| - to_pandas_freq, |
68 |
| - validate_timezone, |
69 | 63 | )
|
70 | 64 | from cognite.client.utils._validation import validate_user_input_dict_with_identifier
|
71 | 65 | from cognite.client.utils.useful_types import SequenceNotStr
|
@@ -1233,145 +1227,6 @@ def retrieve_dataframe(
|
1233 | 1227 | freq = cast(str, granularity).replace("m", "min")
|
1234 | 1228 | return df.reindex(pd.date_range(start=start, end=end, freq=freq, inclusive="left"))
|
1235 | 1229 |
|
1236 |
| - # TODO: Deprecated, don't add support for new features like instance_id |
1237 |
| - def retrieve_dataframe_in_tz( |
1238 |
| - self, |
1239 |
| - *, |
1240 |
| - id: int | Sequence[int] | None = None, |
1241 |
| - external_id: str | SequenceNotStr[str] | None = None, |
1242 |
| - start: datetime.datetime, |
1243 |
| - end: datetime.datetime, |
1244 |
| - aggregates: Aggregate | str | list[Aggregate | str] | None = None, |
1245 |
| - granularity: str | None = None, |
1246 |
| - target_unit: str | None = None, |
1247 |
| - target_unit_system: str | None = None, |
1248 |
| - ignore_unknown_ids: bool = False, |
1249 |
| - include_status: bool = False, |
1250 |
| - ignore_bad_datapoints: bool = True, |
1251 |
| - treat_uncertain_as_bad: bool = True, |
1252 |
| - uniform_index: bool = False, |
1253 |
| - include_aggregate_name: bool = True, |
1254 |
| - include_granularity_name: bool = False, |
1255 |
| - column_names: Literal["id", "external_id"] = "external_id", |
1256 |
| - ) -> pd.DataFrame: |
1257 |
| - """Get datapoints directly in a pandas dataframe in the same timezone as ``start`` and ``end``. |
1258 |
| -
|
1259 |
| - .. admonition:: Deprecation Warning |
1260 |
| -
|
1261 |
| - This SDK function is deprecated and will be removed in the next major release. Reason: Cognite Data |
1262 |
| - Fusion now has native support for timezone and calendar-based aggregations. Please consider migrating |
1263 |
| - already today: The API also supports fixed offsets, yields more accurate results and have better support |
1264 |
| - for exotic timezones and unusual DST offsets. You can use the normal retrieve methods instead, just |
1265 |
| - pass 'timezone' as a parameter. |
1266 |
| -
|
1267 |
| - Args: |
1268 |
| - id (int | Sequence[int] | None): ID or list of IDs. |
1269 |
| - external_id (str | SequenceNotStr[str] | None): External ID or list of External IDs. |
1270 |
| - start (datetime.datetime): Inclusive start, must be timezone aware. |
1271 |
| - end (datetime.datetime): Exclusive end, must be timezone aware and have the same timezone as start. |
1272 |
| - aggregates (Aggregate | str | list[Aggregate | str] | None): Single aggregate or list of aggregates to retrieve. Available options: ``average``, ``continuous_variance``, ``count``, ``count_bad``, ``count_good``, ``count_uncertain``, ``discrete_variance``, ``duration_bad``, ``duration_good``, ``duration_uncertain``, ``interpolation``, ``max``, ``min``, ``step_interpolation``, ``sum`` and ``total_variation``. Default: None (raw datapoints returned) |
1273 |
| - granularity (str | None): The granularity to fetch aggregates at. Can be given as an abbreviation or spelled out for clarity: ``s/second(s)``, ``m/minute(s)``, ``h/hour(s)``, ``d/day(s)``, ``w/week(s)``, ``mo/month(s)``, ``q/quarter(s)``, or ``y/year(s)``. Examples: ``30s``, ``5m``, ``1day``, ``2weeks``. Default: None. |
1274 |
| - target_unit (str | None): The unit_external_id of the datapoints returned. If the time series does not have a unit_external_id that can be converted to the target_unit, an error will be returned. Cannot be used with target_unit_system. |
1275 |
| - target_unit_system (str | None): The unit system of the datapoints returned. Cannot be used with target_unit. |
1276 |
| - ignore_unknown_ids (bool): Whether to ignore missing time series rather than raising an exception. Default: False |
1277 |
| - include_status (bool): Also return the status code, an integer, for each datapoint in the response. Only relevant for raw datapoint queries, not aggregates. |
1278 |
| - ignore_bad_datapoints (bool): Treat datapoints with a bad status code as if they do not exist. If set to false, raw queries will include bad datapoints in the response, and aggregates will in general omit the time period between a bad datapoint and the next good datapoint. Also, the period between a bad datapoint and the previous good datapoint will be considered constant. Default: True. |
1279 |
| - treat_uncertain_as_bad (bool): Treat datapoints with uncertain status codes as bad. If false, treat datapoints with uncertain status codes as good. Used for both raw queries and aggregates. Default: True. |
1280 |
| - uniform_index (bool): If querying aggregates with a non-calendar granularity, specifying ``uniform_index=True`` will return a dataframe with an index with constant spacing between timestamps decided by granularity all the way from `start` to `end` (missing values will be NaNs). Default: False |
1281 |
| - include_aggregate_name (bool): Include 'aggregate' in the column name, e.g. `my-ts|average`. Ignored for raw time series. Default: True |
1282 |
| - include_granularity_name (bool): Include 'granularity' in the column name, e.g. `my-ts|12h`. Added after 'aggregate' when present. Ignored for raw time series. Default: False |
1283 |
| - column_names (Literal['id', 'external_id']): Use either ids or external ids as column names. Time series missing external id will use id as backup. Default: "external_id" |
1284 |
| -
|
1285 |
| - Returns: |
1286 |
| - pd.DataFrame: A pandas DataFrame containing the requested time series with a DatetimeIndex localized in the given timezone. |
1287 |
| - """ |
1288 |
| - warnings.warn( |
1289 |
| - ( |
1290 |
| - "This SDK method, `retrieve_dataframe_in_tz`, is deprecated and will be removed in the next major release. " |
1291 |
| - "Reason: Cognite Data Fusion now has native support for timezone and calendar-based aggregations. Please " |
1292 |
| - "consider migrating already today: The API also supports fixed offsets, yields more accurate results and " |
1293 |
| - "have better support for exotic timezones and unusual DST offsets. You can use the normal retrieve methods " |
1294 |
| - "instead, just pass 'timezone' as a parameter." |
1295 |
| - ), |
1296 |
| - UserWarning, |
1297 |
| - ) |
1298 |
| - _, pd = local_import("numpy", "pandas") # Verify that deps are available or raise CogniteImportError |
1299 |
| - |
1300 |
| - if not exactly_one_is_not_none(id, external_id): |
1301 |
| - raise ValueError("Either input id(s) or external_id(s)") |
1302 |
| - |
1303 |
| - if exactly_one_is_not_none(aggregates, granularity): |
1304 |
| - raise ValueError( |
1305 |
| - "Got only one of 'aggregates' and 'granularity'. " |
1306 |
| - "Pass both to get aggregates, or neither to get raw data" |
1307 |
| - ) |
1308 |
| - tz = validate_timezone(start, end) |
1309 |
| - if aggregates is None and granularity is None: |
1310 |
| - # For raw data, we only need to convert the timezone: |
1311 |
| - return ( |
1312 |
| - # TODO: include_outside_points is missing |
1313 |
| - self.retrieve_dataframe( |
1314 |
| - id=id, |
1315 |
| - external_id=external_id, |
1316 |
| - start=start, |
1317 |
| - end=end, |
1318 |
| - aggregates=aggregates, |
1319 |
| - granularity=granularity, |
1320 |
| - target_unit=target_unit, |
1321 |
| - target_unit_system=target_unit_system, |
1322 |
| - ignore_unknown_ids=ignore_unknown_ids, |
1323 |
| - include_status=include_status, |
1324 |
| - ignore_bad_datapoints=ignore_bad_datapoints, |
1325 |
| - treat_uncertain_as_bad=treat_uncertain_as_bad, |
1326 |
| - uniform_index=uniform_index, |
1327 |
| - include_aggregate_name=include_aggregate_name, |
1328 |
| - include_granularity_name=include_granularity_name, |
1329 |
| - column_names=column_names, |
1330 |
| - limit=None, |
1331 |
| - ) |
1332 |
| - .tz_localize("utc") |
1333 |
| - .tz_convert(str(tz)) |
1334 |
| - ) |
1335 |
| - assert isinstance(granularity, str) # mypy |
1336 |
| - |
1337 |
| - identifiers = IdentifierSequence.load(id, external_id) |
1338 |
| - if not identifiers.are_unique(): |
1339 |
| - duplicated = find_duplicates(identifiers.as_primitives()) |
1340 |
| - raise ValueError(f"The following identifiers were not unique: {duplicated}") |
1341 |
| - |
1342 |
| - intervals = to_fixed_utc_intervals(start, end, granularity) |
1343 |
| - queries = [ |
1344 |
| - {**ident_dct, "aggregates": aggregates, **interval} |
1345 |
| - for ident_dct, interval in itertools.product(identifiers.as_dicts(), intervals) |
1346 |
| - ] |
1347 |
| - arrays = self.retrieve_arrays( |
1348 |
| - limit=None, |
1349 |
| - ignore_unknown_ids=ignore_unknown_ids, |
1350 |
| - include_status=include_status, |
1351 |
| - ignore_bad_datapoints=ignore_bad_datapoints, |
1352 |
| - treat_uncertain_as_bad=treat_uncertain_as_bad, |
1353 |
| - target_unit=target_unit, |
1354 |
| - target_unit_system=target_unit_system, |
1355 |
| - **{identifiers[0].name(): queries}, # type: ignore [arg-type] |
1356 |
| - ) |
1357 |
| - assert isinstance(arrays, DatapointsArrayList) # mypy |
1358 |
| - |
1359 |
| - arrays.concat_duplicate_ids() |
1360 |
| - for arr in arrays: |
1361 |
| - # In case 'include_granularity_name' is used, we don't want '2quarters' to show up as '4343h': |
1362 |
| - arr.granularity = granularity |
1363 |
| - df = ( |
1364 |
| - arrays.to_pandas(column_names, include_aggregate_name, include_granularity_name) |
1365 |
| - .tz_localize("utc") |
1366 |
| - .tz_convert(str(tz)) |
1367 |
| - ) |
1368 |
| - if uniform_index: |
1369 |
| - freq = to_pandas_freq(granularity, start) |
1370 |
| - # TODO: Bug, "small" granularities like s/m/h raise here: |
1371 |
| - start, end = align_large_granularity(start, end, granularity) |
1372 |
| - return df.reindex(pandas_date_range_tz(start, end, freq, inclusive="left")) |
1373 |
| - return df |
1374 |
| - |
1375 | 1230 | def retrieve_latest(
|
1376 | 1231 | self,
|
1377 | 1232 | id: int | LatestDatapointQuery | list[int | LatestDatapointQuery] | None = None,
|
|
0 commit comments