Skip to content

Commit f8448a7

Browse files
committed
minor adjustments
1 parent e49b8f7 commit f8448a7

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

src/datasets/arrow_dataset.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5844,7 +5844,7 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
58445844
# get the info from the README to update them
58455845
if repo_with_dataset_card:
58465846
dataset_card_path = api.hf_hub_download(
5847-
repo_id, config.REPOCARD_FILENAME, repo_type="dataset", revision=revision
5847+
repo_id, config.REPOCARD_FILENAME, repo_type="dataset", revision=parent_commit
58485848
)
58495849
dataset_card = DatasetCard.load(Path(dataset_card_path))
58505850
dataset_card_data = dataset_card.data
@@ -5860,7 +5860,7 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
58605860
dataset_card_data = DatasetCardData()
58615861
metadata_configs = MetadataConfigs()
58625862
dataset_infos_path = api.hf_hub_download(
5863-
repo_id, config.DATASETDICT_INFOS_FILENAME, repo_type="dataset", revision=revision
5863+
repo_id, config.DATASETDICT_INFOS_FILENAME, repo_type="dataset", revision=parent_commit
58645864
)
58655865
with open(dataset_infos_path, encoding="utf-8") as f:
58665866
dataset_infos: dict = json.load(f)
@@ -5935,7 +5935,7 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
59355935
# push to the deprecated dataset_infos.json
59365936
if repo_with_dataset_infos:
59375937
dataset_infos_path = api.hf_hub_download(
5938-
repo_id, config.DATASETDICT_INFOS_FILENAME, repo_type="dataset", revision=revision
5938+
repo_id, config.DATASETDICT_INFOS_FILENAME, repo_type="dataset", revision=parent_commit
59395939
)
59405940
with open(dataset_infos_path, encoding="utf-8") as f:
59415941
dataset_infos: dict = json.load(f)
@@ -5975,9 +5975,10 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
59755975
+ (f" (still {num_commits - i - 1} to go)" if num_commits - i - 1 else "")
59765976
+ "."
59775977
)
5978-
additions = deletions = []
5978+
additions = []
5979+
deletions = []
59795980

5980-
for sleep_time in itertools.chain(range(10), itertools.repeat(30)):
5981+
for retry, sleep_time in enumerate(itertools.chain(range(10), itertools.repeat(30)), start=1):
59815982
# We need to retry if there was a commit in between in case it touched the dataset card data
59825983
sleep_time *= 1 + random.random()
59835984
parent_commit, dataset_card, dataset_infos = get_new_dataset_card_data()
@@ -6005,8 +6006,8 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
60056006
)
60066007
except HfHubHTTPError as err:
60076008
if "Precondition Failed" in str(err):
6008-
print("RETRY")
60096009
time.sleep(sleep_time)
6010+
logger.warning(f"Retry #{retry} for {repo_id}, {config_name}")
60106011
continue
60116012
else:
60126013
raise

0 commit comments

Comments
 (0)