Skip to content

Commit b5a70f7

Browse files
committed
Fix freq string issues in datasets (awslabs#3232)
*Issue #, if available:* fixes awslabs#3229, pandas changes in frequency strings broke some of our logic. *Description of changes:* Add missing frequency strings in _tsf_datasets.py, and get rid of other frequency-related warnings with other datasets. I tested the change by running the following script: ```python from gluonts.dataset.repository import get_dataset, dataset_names skip = [ "m3_monthly", "m3_yearly", "m3_quarterly", "m3_other", "m5", ] for dataset_name in dataset_names: if dataset_name in skip: continue print(dataset_name) dataset = get_dataset(dataset_name, regenerate=True) ``` By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice. **Please tag this pr with at least one of these labels to make our release process faster:** BREAKING, new feature, bug fix, other change, dev setup
1 parent 4388656 commit b5a70f7

File tree

7 files changed

+20
-16
lines changed

7 files changed

+20
-16
lines changed

src/gluonts/dataset/repository/_ercot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def generate_ercot_dataset(dataset_path: Path, dataset_writer: DatasetWriter):
2626
df.ffill(inplace=True)
2727
regions = [col for col in df.columns if col not in ["ds", "y"]]
2828

29-
freq = "1H"
29+
freq = "1h"
3030
prediction_length = 24
3131

3232
start = pd.Period(df["ds"][0], freq=freq)

src/gluonts/dataset/repository/_gp_copula_2019.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class GPCopulaDataset(NamedTuple):
6363
# original dataset can be found at https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014#
6464
num_series=370,
6565
prediction_length=24,
66-
freq="H",
66+
freq="h",
6767
rolling_evaluations=7,
6868
max_target_dim=None,
6969
),
@@ -73,7 +73,7 @@ class GPCopulaDataset(NamedTuple):
7373
# note there are 963 in the original dataset from https://archive.ics.uci.edu/ml/datasets/PEMS-SF
7474
num_series=963,
7575
prediction_length=24,
76-
freq="H",
76+
freq="h",
7777
rolling_evaluations=7,
7878
max_target_dim=None,
7979
),
@@ -82,7 +82,7 @@ class GPCopulaDataset(NamedTuple):
8282
url=root + "solar_nips.tar.gz",
8383
num_series=137,
8484
prediction_length=24,
85-
freq="H",
85+
freq="h",
8686
rolling_evaluations=7,
8787
max_target_dim=None,
8888
),

src/gluonts/dataset/repository/_lstnet.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ class LstnetDataset(NamedTuple):
9191
prediction_length=24,
9292
rolling_evaluations=7,
9393
start_date="2012-01-01",
94-
freq="1H",
94+
freq="1h",
9595
agg_freq=None,
9696
),
9797
"traffic": LstnetDataset(
@@ -105,7 +105,7 @@ class LstnetDataset(NamedTuple):
105105
prediction_length=24,
106106
rolling_evaluations=7,
107107
start_date="2015-01-01",
108-
freq="H",
108+
freq="h",
109109
agg_freq=None,
110110
),
111111
"solar-energy": LstnetDataset(
@@ -117,7 +117,7 @@ class LstnetDataset(NamedTuple):
117117
rolling_evaluations=7,
118118
start_date="2006-01-01",
119119
freq="10min",
120-
agg_freq="1H",
120+
agg_freq="1h",
121121
),
122122
}
123123

src/gluonts/dataset/repository/_tsf_datasets.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,15 @@ def generate_forecasting_dataset(
278278
def default_prediction_length_from_frequency(freq: str) -> int:
279279
prediction_length_map = {
280280
"T": 60,
281+
"min": 60,
281282
"H": 48,
283+
"h": 48,
282284
"D": 30,
283285
"W-SUN": 8,
284286
"M": 12,
287+
"ME": 12,
285288
"Y": 4,
289+
"YE": 4,
286290
}
287291
try:
288292
freq = to_offset(freq).name

src/gluonts/dataset/repository/_tsf_reader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ def frequency_converter(freq: str):
4949

5050
BASE_FREQ_TO_PANDAS_OFFSET: Dict[str, str] = {
5151
"seconds": "S",
52-
"minutely": "T",
53-
"minutes": "T",
54-
"hourly": "H",
55-
"hours": "H",
52+
"minutely": "min",
53+
"minutes": "min",
54+
"hourly": "h",
55+
"hours": "h",
5656
"daily": "D",
5757
"days": "D",
5858
"weekly": "W",

src/gluonts/dataset/repository/_uber_tlc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def generate_uber_dataset(
2828
prediction_length: int,
2929
dataset_writer: DatasetWriter,
3030
):
31-
subsets = {"daily": "1D", "hourly": "1H"}
31+
subsets = {"daily": "1D", "hourly": "1h"}
3232
assert (
3333
uber_freq.lower() in subsets
3434
), f"invalid uber_freq='{uber_freq}'. Allowed values: {subsets.keys()}"

test/dataset/test_tsf_reader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
"input_freq_str, output_freq_str",
2121
[
2222
("30_seconds", "30S"),
23-
("minutely", "T"),
24-
("10_minutes", "10T"),
25-
("hourly", "H"),
26-
("half_hourly", "0.5H"),
23+
("minutely", "min"),
24+
("10_minutes", "10min"),
25+
("hourly", "h"),
26+
("half_hourly", "0.5h"),
2727
("daily", "D"),
2828
("7_days", "7D"),
2929
("weekly", "W"),

0 commit comments

Comments
 (0)