Skip to content

Commit 1449822

Browse files
authored
Merge pull request #4797 from lexming/fix-git-clone
clean up use get_source_tarball_from_git` to use same checkout mechanism for creating tarball for Git repos with tags and commits
2 parents ee3f7a8 + 4ba02d6 commit 1449822

File tree

2 files changed

+126
-93
lines changed

2 files changed

+126
-93
lines changed

easybuild/tools/filetools.py

Lines changed: 28 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -2700,27 +2700,16 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
27002700
# prepare target directory and clone repository
27012701
mkdir(target_dir, parents=True)
27022702

2703+
# compose base git command
2704+
git_cmd = 'git'
2705+
if extra_config_params is not None:
2706+
git_cmd_params = [f"-c {param}" for param in extra_config_params]
2707+
git_cmd += f" {' '.join(git_cmd_params)}"
2708+
27032709
# compose 'git clone' command, and run it
2704-
if extra_config_params:
2705-
git_cmd = 'git ' + ' '.join(['-c %s' % param for param in extra_config_params])
2706-
else:
2707-
git_cmd = 'git'
27082710
clone_cmd = [git_cmd, 'clone']
2709-
2710-
if not keep_git_dir and not commit:
2711-
# Speed up cloning by only fetching the most recent commit, not the whole history
2712-
# When we don't want to keep the .git folder there won't be a difference in the result
2713-
clone_cmd.extend(['--depth', '1'])
2714-
2715-
if tag:
2716-
clone_cmd.extend(['--branch', tag])
2717-
if recursive:
2718-
clone_cmd.append('--recursive')
2719-
if recurse_submodules:
2720-
clone_cmd.extend(["--recurse-submodules='%s'" % pat for pat in recurse_submodules])
2721-
else:
2722-
# checkout is done separately below for specific commits
2723-
clone_cmd.append('--no-checkout')
2711+
# checkout is done separately below for specific commits
2712+
clone_cmd.append('--no-checkout')
27242713

27252714
clone_cmd.append(f'{url}/{repo_name}.git')
27262715

@@ -2735,56 +2724,31 @@ def get_source_tarball_from_git(filename, target_dir, git_config):
27352724
if clone_into:
27362725
repo_name = clone_into
27372726

2727+
repo_dir = os.path.join(tmpdir, repo_name)
2728+
2729+
# compose checkout command
2730+
checkout_cmd = [git_cmd, 'checkout']
27382731
# if a specific commit is asked for, check it out
27392732
if commit:
2740-
checkout_cmd = [git_cmd, 'checkout', commit]
2741-
2742-
if recursive or recurse_submodules:
2743-
checkout_cmd.extend(['&&', git_cmd, 'submodule', 'update', '--init'])
2744-
if recursive:
2745-
checkout_cmd.append('--recursive')
2746-
if recurse_submodules:
2747-
checkout_cmd.extend(["--recurse-submodules='%s'" % pat for pat in recurse_submodules])
2748-
2749-
work_dir = os.path.join(tmpdir, repo_name) if repo_name else tmpdir
2750-
run_shell_cmd(' '.join(checkout_cmd), work_dir=work_dir, hidden=True, verbose_dry_run=True)
2751-
2752-
elif not build_option('extended_dry_run'):
2753-
# If we wanted to get a tag make sure we actually got a tag and not a branch with the same name
2754-
# This doesn't make sense in dry-run mode as we don't have anything to check
2755-
cmd = f"{git_cmd} describe --exact-match --tags HEAD"
2756-
work_dir = os.path.join(tmpdir, repo_name) if repo_name else tmpdir
2757-
res = run_shell_cmd(cmd, fail_on_error=False, work_dir=work_dir, hidden=True, verbose_dry_run=True)
2758-
2759-
if res.exit_code != EasyBuildExit.SUCCESS or tag not in res.output.splitlines():
2760-
msg = f"Tag {tag} was not downloaded in the first try due to {url}/{repo_name} containing a branch"
2761-
msg += f" with the same name. You might want to alert the maintainers of {repo_name} about that issue."
2762-
print_warning(msg)
2763-
2764-
cmds = []
2765-
2766-
if not keep_git_dir:
2767-
# make the repo unshallow first;
2768-
# this is equivalent with 'git fetch -unshallow' in Git 1.8.3+
2769-
# (first fetch seems to do nothing, unclear why)
2770-
cmds.append(f"{git_cmd} fetch --depth=2147483647 && git fetch --depth=2147483647")
2771-
2772-
cmds.append(f"{git_cmd} checkout refs/tags/{tag}")
2773-
# Clean all untracked files, e.g. from left-over submodules
2774-
cmds.append(f"{git_cmd} clean --force -d -x")
2775-
if recursive:
2776-
cmds.append(f"{git_cmd} submodule update --init --recursive")
2777-
elif recurse_submodules:
2778-
cmds.append(f"{git_cmd} submodule update --init ")
2779-
cmds[-1] += ' '.join(["--recurse-submodules='%s'" % pat for pat in recurse_submodules])
2780-
2781-
for cmd in cmds:
2782-
run_shell_cmd(cmd, work_dir=work_dir, hidden=True, verbose_dry_run=True)
2733+
checkout_cmd.append(f"{commit}")
2734+
elif tag:
2735+
checkout_cmd.append(f"refs/tags/{tag}")
2736+
2737+
run_shell_cmd(' '.join(checkout_cmd), work_dir=repo_dir, hidden=True, verbose_dry_run=True)
2738+
2739+
if recursive or recurse_submodules:
2740+
submodule_cmd = [git_cmd, 'submodule', 'update', '--init']
2741+
if recursive:
2742+
submodule_cmd.append('--recursive')
2743+
if recurse_submodules:
2744+
submodule_pathspec = [f"':{submod_path}'" for submod_path in recurse_submodules]
2745+
submodule_cmd.extend(['--'] + submodule_pathspec)
2746+
2747+
run_shell_cmd(' '.join(submodule_cmd), work_dir=repo_dir, hidden=True, verbose_dry_run=True)
27832748

27842749
# Create archive
2785-
repo_path = os.path.join(tmpdir, repo_name)
27862750
reproducible = not keep_git_dir # presence of .git directory renders repo unreproducible
2787-
archive_path = make_archive(repo_path, archive_file=filename, archive_dir=target_dir, reproducible=reproducible)
2751+
archive_path = make_archive(repo_dir, archive_file=filename, archive_dir=target_dir, reproducible=reproducible)
27882752

27892753
# cleanup (repo_name dir does not exist in dry run mode)
27902754
remove(tmpdir)

test/framework/filetools.py

Lines changed: 98 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2978,38 +2978,50 @@ def run_check():
29782978
}
29792979
string_args = {
29802980
'git_repo': '[email protected]:easybuilders/testrepository.git',
2981+
'git_clone_cmd': 'git clone --no-checkout',
29812982
'test_prefix': self.test_prefix,
29822983
}
29832984

29842985
expected = '\n'.join([
2985-
r' running shell command "git clone --depth 1 --branch tag_for_tests {git_repo}"',
2986+
r' running shell command "{git_clone_cmd} {git_repo}"',
29862987
r" \(in .*/tmp.*\)",
2988+
r' running shell command "git checkout refs/tags/tag_for_tests"',
2989+
r" \(in .*/{repo_name}\)",
29872990
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
29882991
]).format(**string_args, repo_name='testrepository')
29892992
run_check()
29902993

29912994
git_config['clone_into'] = 'test123'
29922995
expected = '\n'.join([
2993-
r' running shell command "git clone --depth 1 --branch tag_for_tests {git_repo} test123"',
2996+
r' running shell command "{git_clone_cmd} {git_repo} test123"',
29942997
r" \(in .*/tmp.*\)",
2998+
r' running shell command "git checkout refs/tags/tag_for_tests"',
2999+
r" \(in .*/{repo_name}\)",
29953000
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
29963001
]).format(**string_args, repo_name='test123')
29973002
run_check()
29983003
del git_config['clone_into']
29993004

30003005
git_config['recursive'] = True
30013006
expected = '\n'.join([
3002-
r' running shell command "git clone --depth 1 --branch tag_for_tests --recursive {git_repo}"',
3007+
r' running shell command "{git_clone_cmd} {git_repo}"',
30033008
r" \(in .*/tmp.*\)",
3009+
r' running shell command "git checkout refs/tags/tag_for_tests"',
3010+
r" \(in .*/{repo_name}\)",
3011+
r' running shell command "git submodule update --init --recursive"',
3012+
r" \(in .*/{repo_name}\)",
30043013
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
30053014
]).format(**string_args, repo_name='testrepository')
30063015
run_check()
30073016

30083017
git_config['recurse_submodules'] = ['!vcflib', '!sdsl-lite']
30093018
expected = '\n'.join([
3010-
' running shell command "git clone --depth 1 --branch tag_for_tests --recursive'
3011-
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' {git_repo}"',
3019+
r' running shell command "{git_clone_cmd} {git_repo}"',
30123020
r" \(in .*/tmp.*\)",
3021+
r' running shell command "git checkout refs/tags/tag_for_tests"',
3022+
r" \(in .*/{repo_name}\)",
3023+
r' running shell command "git submodule update --init --recursive -- \':!vcflib\' \':!sdsl-lite\'"',
3024+
r" \(in .*/{repo_name}\)",
30133025
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
30143026
]).format(**string_args, repo_name='testrepository')
30153027
run_check()
@@ -3018,33 +3030,30 @@ def run_check():
30183030
'submodule."fastahack".active=false',
30193031
'submodule."sha1".active=false',
30203032
]
3033+
git_cmd_extra = 'git -c submodule."fastahack".active=false -c submodule."sha1".active=false'
30213034
expected = '\n'.join([
3022-
' running shell command "git -c submodule."fastahack".active=false -c submodule."sha1".active=false'
3023-
+ ' clone --depth 1 --branch tag_for_tests --recursive'
3024-
+ ' --recurse-submodules=\'!vcflib\' --recurse-submodules=\'!sdsl-lite\' {git_repo}"',
3035+
r' running shell command "{git_cmd_extra} clone --no-checkout {git_repo}"',
30253036
r" \(in .*/tmp.*\)",
3037+
r' running shell command "{git_cmd_extra} checkout refs/tags/tag_for_tests"',
3038+
r" \(in .*/{repo_name}\)",
3039+
r' running shell command "{git_cmd_extra} submodule update --init --recursive --'
3040+
+ ' \':!vcflib\' \':!sdsl-lite\'"',
3041+
r" \(in .*/{repo_name}\)",
30263042
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
3027-
]).format(**string_args, repo_name='testrepository')
3043+
]).format(**string_args, repo_name='testrepository', git_cmd_extra=git_cmd_extra)
30283044
run_check()
30293045
del git_config['recurse_submodules']
30303046
del git_config['extra_config_params']
30313047

3032-
git_config['keep_git_dir'] = True
3033-
expected = '\n'.join([
3034-
r' running shell command "git clone --branch tag_for_tests --recursive {git_repo}"',
3035-
r" \(in .*/tmp.*\)",
3036-
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
3037-
]).format(**string_args, repo_name='testrepository')
3038-
run_check()
3039-
del git_config['keep_git_dir']
3040-
30413048
del git_config['tag']
30423049
git_config['commit'] = '8456f86'
30433050
expected = '\n'.join([
30443051
r' running shell command "git clone --no-checkout {git_repo}"',
30453052
r" \(in .*/tmp.*\)",
3046-
r' running shell command "git checkout 8456f86 && git submodule update --init --recursive"',
3047-
r" \(in .*/testrepository\)",
3053+
r' running shell command "git checkout 8456f86"',
3054+
r" \(in .*/{repo_name}\)",
3055+
r' running shell command "git submodule update --init --recursive"',
3056+
r" \(in .*/{repo_name}\)",
30483057
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
30493058
]).format(**string_args, repo_name='testrepository')
30503059
run_check()
@@ -3053,9 +3062,10 @@ def run_check():
30533062
expected = '\n'.join([
30543063
r' running shell command "git clone --no-checkout {git_repo}"',
30553064
r" \(in .*/tmp.*\)",
3056-
r' running shell command "git checkout 8456f86 && git submodule update --init '
3057-
r"--recursive --recurse-submodules='!vcflib' --recurse-submodules='!sdsl-lite'\"",
3058-
r" \(in .*/testrepository\)",
3065+
r' running shell command "git checkout 8456f86"',
3066+
r" \(in .*/{repo_name}\)",
3067+
r' running shell command "git submodule update --init --recursive -- \':!vcflib\' \':!sdsl-lite\'"',
3068+
r" \(in .*/{repo_name}\)",
30593069
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
30603070
]).format(**string_args, repo_name='testrepository')
30613071
run_check()
@@ -3064,9 +3074,9 @@ def run_check():
30643074
del git_config['recurse_submodules']
30653075
expected = '\n'.join([
30663076
r' running shell command "git clone --no-checkout {git_repo}"',
3067-
r" \(in /.*\)",
3077+
r" \(in .*\)",
30683078
r' running shell command "git checkout 8456f86"',
3069-
r" \(in /.*/testrepository\)",
3079+
r" \(in .*/{repo_name}\)",
30703080
r"Archiving '.*/{repo_name}' into '{test_prefix}/target/test.tar.xz'...",
30713081
]).format(**string_args, repo_name='testrepository')
30723082
run_check()
@@ -3116,21 +3126,21 @@ def run_check():
31163126
with self.mocked_stdout_stderr():
31173127
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
31183128
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'this-is-a-branch.txt')))
3129+
self.assertFalse(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
31193130
os.remove(test_file)
31203131

31213132
# use a tag that clashes with a branch name and make sure this is handled correctly
31223133
git_config['tag'] = 'tag_for_tests'
31233134
with self.mocked_stdout_stderr():
31243135
res = ft.get_source_tarball_from_git('test', target_dir, git_config)
3125-
stderr = self.get_stderr()
3126-
self.assertIn('Tag tag_for_tests was not downloaded in the first try', stderr)
31273136
self.assertEqual(res, test_file)
31283137
self.assertTrue(os.path.isfile(test_file))
31293138
# Check that we indeed downloaded the tag and not the branch
31303139
extracted_dir = tempfile.mkdtemp(prefix='extracted_dir')
31313140
with self.mocked_stdout_stderr():
31323141
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
31333142
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'this-is-a-tag.txt')))
3143+
self.assertFalse(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
31343144

31353145
del git_config['tag']
31363146
git_config['commit'] = '90366ea'
@@ -3139,15 +3149,74 @@ def run_check():
31393149
self.assertEqual(res, test_file)
31403150
self.assertTrue(os.path.isfile(test_file))
31413151
test_tar_files.append(os.path.basename(test_file))
3142-
self.assertEqual(sorted(os.listdir(target_dir)), test_tar_files)
3152+
self.assertCountEqual(sorted(os.listdir(target_dir)), test_tar_files)
3153+
extracted_dir = tempfile.mkdtemp(prefix='extracted_dir')
3154+
with self.mocked_stdout_stderr():
3155+
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
3156+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'README.md')))
3157+
self.assertFalse(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
31433158

31443159
git_config['keep_git_dir'] = True
31453160
res = ft.get_source_tarball_from_git('test3', target_dir, git_config)
31463161
test_file = os.path.join(target_dir, 'test3.tar.xz')
31473162
self.assertEqual(res, test_file)
31483163
self.assertTrue(os.path.isfile(test_file))
31493164
test_tar_files.append(os.path.basename(test_file))
3150-
self.assertEqual(sorted(os.listdir(target_dir)), test_tar_files)
3165+
self.assertCountEqual(sorted(os.listdir(target_dir)), test_tar_files)
3166+
extracted_dir = tempfile.mkdtemp(prefix='extracted_dir')
3167+
with self.mocked_stdout_stderr():
3168+
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
3169+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'README.md')))
3170+
self.assertTrue(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
3171+
3172+
del git_config['keep_git_dir']
3173+
git_config['commit'] = '17a551c'
3174+
git_config['recursive'] = True
3175+
res = ft.get_source_tarball_from_git('test_recursive', target_dir, git_config)
3176+
test_file = os.path.join(target_dir, 'test_recursive.tar.xz')
3177+
self.assertEqual(res, test_file)
3178+
self.assertTrue(os.path.isfile(test_file))
3179+
test_tar_files.append(os.path.basename(test_file))
3180+
self.assertCountEqual(sorted(os.listdir(target_dir)), test_tar_files)
3181+
extracted_dir = tempfile.mkdtemp(prefix='extracted_dir')
3182+
with self.mocked_stdout_stderr():
3183+
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
3184+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'README.md')))
3185+
self.assertFalse(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
3186+
self.assertTrue(os.path.isdir(os.path.join(extracted_repo_dir, 'easybuilders.github.io')))
3187+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'easybuilders.github.io', 'index.html')))
3188+
3189+
git_config['commit'] = '17a551c'
3190+
git_config['recurse_submodules'] = ['easybuilders.github.io']
3191+
res = ft.get_source_tarball_from_git('test_submodules', target_dir, git_config)
3192+
test_file = os.path.join(target_dir, 'test_submodules.tar.xz')
3193+
self.assertEqual(res, test_file)
3194+
self.assertTrue(os.path.isfile(test_file))
3195+
test_tar_files.append(os.path.basename(test_file))
3196+
self.assertCountEqual(sorted(os.listdir(target_dir)), test_tar_files)
3197+
extracted_dir = tempfile.mkdtemp(prefix='extracted_dir')
3198+
with self.mocked_stdout_stderr():
3199+
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
3200+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'README.md')))
3201+
self.assertFalse(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
3202+
self.assertTrue(os.path.isdir(os.path.join(extracted_repo_dir, 'easybuilders.github.io')))
3203+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'easybuilders.github.io', 'index.html')))
3204+
3205+
git_config['commit'] = '17a551c'
3206+
git_config['recurse_submodules'] = ['!easybuilders.github.io']
3207+
res = ft.get_source_tarball_from_git('test_exclude_submodules', target_dir, git_config)
3208+
test_file = os.path.join(target_dir, 'test_exclude_submodules.tar.xz')
3209+
self.assertEqual(res, test_file)
3210+
self.assertTrue(os.path.isfile(test_file))
3211+
test_tar_files.append(os.path.basename(test_file))
3212+
self.assertCountEqual(sorted(os.listdir(target_dir)), test_tar_files)
3213+
extracted_dir = tempfile.mkdtemp(prefix='extracted_dir')
3214+
with self.mocked_stdout_stderr():
3215+
extracted_repo_dir = ft.extract_file(test_file, extracted_dir, change_into_dir=False)
3216+
self.assertTrue(os.path.isfile(os.path.join(extracted_repo_dir, 'README.md')))
3217+
self.assertFalse(os.path.isdir(os.path.join(extracted_repo_dir, '.git')))
3218+
self.assertTrue(os.path.isdir(os.path.join(extracted_repo_dir, 'easybuilders.github.io')))
3219+
self.assertFalse(os.path.isfile(os.path.join(extracted_repo_dir, 'easybuilders.github.io', 'index.html')))
31513220

31523221
except EasyBuildError as err:
31533222
if "Network is down" in str(err):

0 commit comments

Comments
 (0)