We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 8b252d8 + 0310cae commit 8e6c160Copy full SHA for 8e6c160
src/instructlab/training/utils.py
@@ -870,8 +870,13 @@ def load_latest_full_state(args, accelerator) -> None:
870
if not output_dir.is_dir():
871
return
872
873
- # picks checkpoint with the largest number of samples seen, by name.
874
- checkpoint_list = sorted(list(output_dir.iterdir()), reverse=True)
+ # picks checkpoint with the largest number of samples by splitting the "samples_NNNN" string on _
+ # and comparing the number at the end of the string
875
+ checkpoint_list = sorted(
876
+ list(output_dir.iterdir()),
877
+ reverse=True,
878
+ key=lambda x: int(str(x).rsplit("_", maxsplit=1)[-1]),
879
+ )
880
881
if len(checkpoint_list) == 0:
882
log_rank_0(
0 commit comments