@@ -5844,7 +5844,7 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
58445844 # get the info from the README to update them
58455845 if repo_with_dataset_card :
58465846 dataset_card_path = api .hf_hub_download (
5847- repo_id , config .REPOCARD_FILENAME , repo_type = "dataset" , revision = revision
5847+ repo_id , config .REPOCARD_FILENAME , repo_type = "dataset" , revision = parent_commit
58485848 )
58495849 dataset_card = DatasetCard .load (Path (dataset_card_path ))
58505850 dataset_card_data = dataset_card .data
@@ -5860,7 +5860,7 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
58605860 dataset_card_data = DatasetCardData ()
58615861 metadata_configs = MetadataConfigs ()
58625862 dataset_infos_path = api .hf_hub_download (
5863- repo_id , config .DATASETDICT_INFOS_FILENAME , repo_type = "dataset" , revision = revision
5863+ repo_id , config .DATASETDICT_INFOS_FILENAME , repo_type = "dataset" , revision = parent_commit
58645864 )
58655865 with open (dataset_infos_path , encoding = "utf-8" ) as f :
58665866 dataset_infos : dict = json .load (f )
@@ -5935,7 +5935,7 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
59355935 # push to the deprecated dataset_infos.json
59365936 if repo_with_dataset_infos :
59375937 dataset_infos_path = api .hf_hub_download (
5938- repo_id , config .DATASETDICT_INFOS_FILENAME , repo_type = "dataset" , revision = revision
5938+ repo_id , config .DATASETDICT_INFOS_FILENAME , repo_type = "dataset" , revision = parent_commit
59395939 )
59405940 with open (dataset_infos_path , encoding = "utf-8" ) as f :
59415941 dataset_infos : dict = json .load (f )
@@ -5975,9 +5975,10 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
59755975 + (f" (still { num_commits - i - 1 } to go)" if num_commits - i - 1 else "" )
59765976 + "."
59775977 )
5978- additions = deletions = []
5978+ additions = []
5979+ deletions = []
59795980
5980- for sleep_time in itertools .chain (range (10 ), itertools .repeat (30 )):
5981+ for retry , sleep_time in enumerate ( itertools .chain (range (10 ), itertools .repeat (30 )), start = 1 ):
59815982 # We need to retry if there was a commit in between in case it touched the dataset card data
59825983 sleep_time *= 1 + random .random ()
59835984 parent_commit , dataset_card , dataset_infos = get_new_dataset_card_data ()
@@ -6005,8 +6006,8 @@ def get_new_dataset_card_data() -> tuple[str, str, Optional[str]]:
60056006 )
60066007 except HfHubHTTPError as err :
60076008 if "Precondition Failed" in str (err ):
6008- print ("RETRY" )
60096009 time .sleep (sleep_time )
6010+ logger .warning (f"Retry #{ retry } for { repo_id } , { config_name } " )
60106011 continue
60116012 else :
60126013 raise
0 commit comments