@@ -1895,15 +1895,34 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
18951895 operations = additions [
18961896 i * config .UPLOADS_MAX_NUMBER_PER_COMMIT : (i + 1 ) * config .UPLOADS_MAX_NUMBER_PER_COMMIT
18971897 ]
1898- commit_info = api .create_commit (
1899- repo_id ,
1900- operations = operations ,
1901- commit_message = commit_message + f" (part { i :05d} -of-{ num_commits :05d} )" ,
1902- commit_description = commit_description ,
1903- repo_type = "dataset" ,
1904- revision = revision ,
1905- create_pr = create_pr ,
1906- )
1898+ for retry , sleep_time in enumerate (itertools .chain (range (10 ), itertools .repeat (30 )), start = 1 ):
1899+ # We need to retry if another commit happens at the same time
1900+ sleep_time *= 1 + random .random ()
1901+ try :
1902+ commit_info = api .create_commit (
1903+ repo_id ,
1904+ operations = operations ,
1905+ commit_message = commit_message + f" (part { i :05d} -of-{ num_commits :05d} )" ,
1906+ commit_description = commit_description ,
1907+ repo_type = "dataset" ,
1908+ revision = revision ,
1909+ create_pr = create_pr ,
1910+ )
1911+ except HfHubHTTPError as err :
1912+ if (
1913+ err .__context__
1914+ and isinstance (err .__context__ , HTTPError )
1915+ and err .__context__ .response .status_code == 409
1916+ ):
1917+ # 409 is Conflict (another commit is in progress)
1918+ time .sleep (sleep_time )
1919+ logger .info (
1920+ f"Retrying intermediate commit for { repo_id } , { config_name } ({ retry } /n with status_code { err .__context__ .response .status_code } )"
1921+ )
1922+ continue
1923+ else :
1924+ raise
1925+ break
19071926 logger .info (
19081927 f"Commit #{ i + 1 } completed"
19091928 + (f" (still { num_commits - i - 1 } to go)" if num_commits - i - 1 else "" )
@@ -2745,15 +2764,34 @@ def get_deletions_and_dataset_card() -> tuple[str, list[CommitOperationDelete],
27452764 operations = additions [
27462765 i * config .UPLOADS_MAX_NUMBER_PER_COMMIT : (i + 1 ) * config .UPLOADS_MAX_NUMBER_PER_COMMIT
27472766 ]
2748- commit_info = api .create_commit (
2749- repo_id ,
2750- operations = operations ,
2751- commit_message = commit_message + f" (part { i :05d} -of-{ num_commits :05d} )" ,
2752- commit_description = commit_description ,
2753- repo_type = "dataset" ,
2754- revision = revision ,
2755- create_pr = create_pr ,
2756- )
2767+ for retry , sleep_time in enumerate (itertools .chain (range (10 ), itertools .repeat (30 )), start = 1 ):
2768+ # We need to retry if another commit happens at the same time
2769+ sleep_time *= 1 + random .random ()
2770+ try :
2771+ commit_info = api .create_commit (
2772+ repo_id ,
2773+ operations = operations ,
2774+ commit_message = commit_message + f" (part { i :05d} -of-{ num_commits :05d} )" ,
2775+ commit_description = commit_description ,
2776+ repo_type = "dataset" ,
2777+ revision = revision ,
2778+ create_pr = create_pr ,
2779+ )
2780+ except HfHubHTTPError as err :
2781+ if (
2782+ err .__context__
2783+ and isinstance (err .__context__ , HTTPError )
2784+ and err .__context__ .response .status_code == 409
2785+ ):
2786+ # 409 is Conflict (another commit is in progress)
2787+ time .sleep (sleep_time )
2788+ logger .info (
2789+ f"Retrying intermediate commit for { repo_id } , { config_name } ({ retry } /n with status_code { err .__context__ .response .status_code } )"
2790+ )
2791+ continue
2792+ else :
2793+ raise
2794+ break
27572795 logger .info (
27582796 f"Commit #{ i + 1 } completed"
27592797 + (f" (still { num_commits - i - 1 } to go)" if num_commits - i - 1 else "" )
0 commit comments