Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
67e8751
adding easyconfigs: RoseTTAFold-1.0.0-fosscuda-2019b-Python-3.7.4-PyT…
zemu-unile Aug 23, 2021
f43d1c8
updated dependency names
zemu-unile Aug 24, 2021
61bcebe
Merge branch 'develop' into 20210823110414_new_pr_RoseTTAFold100
zemu-unile Aug 27, 2021
44202c1
update name of dependency
zemu-unile Aug 27, 2021
756f480
add mention of protein databases
zemu-unile Aug 27, 2021
a1356e3
add exception for duplicate TensorFlow for RoseTTAFold-1.0.0
zemu-unile Aug 27, 2021
bfeb44e
add exception for duplicate LLVM for RoseTTAFold-1.0.0 due to transit…
zemu-unile Aug 27, 2021
7e47af2
Merge branch 'develop' into 20210823110414_new_pr_RoseTTAFold100
zemu-unile Aug 27, 2021
ea9370d
add how to download weights.tar.gz
zemu-unile Sep 2, 2021
ac38ab9
remove unused dependency
zemu-unile Sep 8, 2021
0c69b07
add TensorFlow to versionsuffix
zemu-unile Sep 8, 2021
b8ccf52
add HH-suite 3.3.0 for 2019b
zemu-unile Oct 22, 2021
b59ede4
update HH-Suite version to 3.3.0
zemu-unile Oct 25, 2021
cae5582
add exception for HH-Suite
zemu-unile Oct 25, 2021
ebbc4c4
Merge branch 'develop' of https://github.com/easybuilders/easybuild-e…
SebastianAchilles Oct 25, 2021
1fc0868
Merge branch 'develop' into 20210823110414_new_pr_RoseTTAFold100
zemu-unile Oct 25, 2021
7a765b2
remove duplicate HH-suite easyconfig
zemu-unile Oct 25, 2021
140e44c
Merge branch 'develop' into 20210823110414_new_pr_RoseTTAFold100
boegel Aug 30, 2022
b95dcd3
fix conflict in test exceptions
zemu-unile Oct 21, 2022
0d4a789
use SYSTEM constant for dependencies
zemu-unile Oct 21, 2022
f216371
remove leftover lines from merge conflict
zemu-unile Oct 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
easyblock = 'Tarball'

name = 'RoseTTAFold'
version = '1.0.0'
local_pytorch_version = '1.8.1'
local_pytorch_suffix = '-PyTorch-%s' % local_pytorch_version
local_python_suffix = '-Python-%(pyver)s'
local_tf_version = '1.15.2'
local_tf_suffix = '-TensorFlow-%s' % local_tf_version
versionsuffix = local_python_suffix + local_pytorch_suffix + local_tf_suffix

homepage = 'https://github.com/RosettaCommons/RoseTTAFold'
description = """Official implementation of RoseTTAFold: Accurate prediction
of protein structures and interactions using a 3-track network."""

toolchain = {'name': 'fosscuda', 'version': '2019b'}
toolchainopts = {'openmp': True}

github_account = 'RosettaCommons'
source_urls = [GITHUB_SOURCE]

# See https://github.com/RosettaCommons/RoseTTAFold/#installation for downloading weights.tar.gz
#
# additionally needs some protein databases to work, total size about 2.5 TB (~500 GB with file system compression)
# See https://github.com/RosettaCommons/RoseTTAFold/blob/main/README.md for download links, as of August 2021:
# http://wwwuser.gwdg.de/~compbiol/uniclust/2020_06/UniRef30_2020_06_hhsuite.tar.gz
# https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz
# https://files.ipd.uw.edu/pub/RoseTTAFold/pdb100_2021Mar03.tar.gz
sources = [
'v%(version)s.tar.gz',
'weights.tar.gz',
]

patches = [
'RoseTTAFold-1.0.0_cpu_mem_from_env.patch',
'RoseTTAFold-1.0.0_db_paths_from_env.patch',
'RoseTTAFold-1.0.0_no_conda.patch',
'RoseTTAFold-1.0.0_use_eb_paths.patch',
'RoseTTAFold-1.0.0_fix_cache_directory.patch',
'RoseTTAFold-1.0.0_lddt_path.patch',
]

checksums = [
'c3db4ec7a0d933c5877a76112d51aa0466d86698807603614c419286261df524', # v1.0.0.tar.gz
'6379adffe1e0a26fcaa33a228f695be9a10deeba565b38cb0b1b9231d8b5c369', # weights.tar.gz
'06de539538fb48ce2efdf3a989f50f02fd360556b76df18f3e0083af5d573461', # RoseTTAFold-1.0.0_cpu_mem_from_env.patch
'f7a163cdf7d2c62e18a691a322c81c2fd20a2f92bdbd6e806f93246842092e3e', # RoseTTAFold-1.0.0_db_paths_from_env.patch
'a1ea483525cd94712e56107de6513c0c27be8942fd9f3ad627482d787b3c3a50', # RoseTTAFold-1.0.0_no_conda.patch
'9d4d56993e5e1bc52ecb3c67b3f71e3b869d559c0c85281750bc0a708b271d55', # RoseTTAFold-1.0.0_use_eb_paths.patch
'6e7abdce5c677803aeb174b527d1a360212ef4cc494cf949f75f6baeaffc29b5', # RoseTTAFold-1.0.0_fix_cache_directory.patch
'39c56e08b27b2a570d0c920cf1150326ddd583d467ae530a6ba559c183f110d6', # RoseTTAFold-1.0.0_lddt_path.patch
]

dependencies = [
('Biopython', '1.75', local_python_suffix),
('BLAST', '2.2.26', '-Linux_x86_64', SYSTEM),
('HH-suite', '3.3.0', local_python_suffix),
('lDDT', '1.2', '', SYSTEM),
('PyRosetta', '4.release-292', local_python_suffix),
('PyTorch-Geometric', '1.6.3', local_python_suffix + local_pytorch_suffix),
('Python', '3.7.4'),
('SciPy-bundle', '2019.10', local_python_suffix),
('TensorFlow', local_tf_version, local_python_suffix),
('CSBLAST', '2.2.3'),
('DGL', '0.6.1', local_python_suffix + local_pytorch_suffix),
('lie_learn', '0.0.1.post1', local_python_suffix),
('parallel', '20190922'),
('PSIPRED', '4.02'),
]

local_broken_symlinks = ['example/pyrosetta/model/model_%s.pdb' % i for i in range(1, 6)]
preinstallopts = 'cp -r ../weights . && rm %s && ' % ' '.join(local_broken_symlinks)

modextrapaths = {'PATH': ''}

sanity_check_paths = {
'dirs': [
'DAN-msa',
'folding',
'network',
'weights',
],
'files': [
'run_e2e_ver.sh',
'run_pyrosetta_ver.sh',
'input_prep/make_ss.sh',
'input_prep/make_msa.sh',
'network/predict_e2e.py',
'network/predict_pyRosetta.py',
]
}

moduleclass = 'bio'
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
Read the number of cores and maximum memory to use from environment instead of hard coding
author: Christoph Siegert (Leipzig University)

diff -ruN RoseTTAFold.orig/run_e2e_ver.sh RoseTTAFold/run_e2e_ver.sh
--- RoseTTAFold.orig/run_e2e_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_e2e_ver.sh 2021-08-16 13:06:25.547251482 +0200
@@ -15,8 +15,8 @@
SCRIPT=`realpath -s $0`
export PIPEDIR=`dirname $SCRIPT`

-CPU="8" # number of CPUs to use
-MEM="64" # max memory (in GB)
+CPU=${CPU:-"8"} # number of CPUs to use
+MEM=${MEM:-"64"} # max memory (in GB)

# Inputs:
IN="$1" # input.fasta
diff -ruN RoseTTAFold.orig/run_pyrosetta_ver.sh RoseTTAFold/run_pyrosetta_ver.sh
--- RoseTTAFold.orig/run_pyrosetta_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_pyrosetta_ver.sh 2021-08-16 13:07:02.619152279 +0200
@@ -15,8 +15,8 @@
SCRIPT=`realpath -s $0`
export PIPEDIR=`dirname $SCRIPT`

-CPU="8" # number of CPUs to use
-MEM="64" # max memory (in GB)
+CPU=${CPU:-"8"} # number of CPUs to use
+MEM=${MEM:-"64"} # max memory (in GB)

# Inputs:
IN="$1" # input.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
Read the database path from environment instead of assuming them in the installation directory
author: Christoph Siegert (Leipzig University)

diff -ruN RoseTTAFold.orig/input_prep/make_msa.sh RoseTTAFold/input_prep/make_msa.sh
--- RoseTTAFold.orig/input_prep/make_msa.sh 2021-08-16 12:54:05.769231102 +0200
+++ RoseTTAFold/input_prep/make_msa.sh 2021-08-16 13:22:02.508663769 +0200
@@ -9,8 +9,8 @@
MEM="$4"

# sequence databases
-DB="$PIPEDIR/UniRef30_2020_06/UniRef30_2020_06"
-MYDB="$PIPEDIR/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"
+DB="$UNIREF30PATH/UniRef30_2020_06"
+MYDB="$BFDPATH/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt"

# setup hhblits command
HHBLITS="hhblits -o /dev/null -mact 0.35 -maxfilt 100000000 -neffmax 20 -cov 25 -cpu $CPU -nodiff -realign_max 100000000 -maxseq 1000000 -maxmem $MEM -n 4 -d $DB -d $MYDB"
diff -ruN RoseTTAFold.orig/run_e2e_ver.sh RoseTTAFold/run_e2e_ver.sh
--- RoseTTAFold.orig/run_e2e_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_e2e_ver.sh 2021-08-16 13:21:02.188832795 +0200
@@ -22,6 +22,9 @@
IN="$1" # input.fasta
WDIR=`realpath -s $2` # working folder

+BFDPATH=${BFDPATH:-"$PIPEDIR/bfd/"}
+PDB100PATH=${PDB100PATH:-"$PIPEDIR/pdb100_2021Mar03/"}
+UNIREF30PATH=${UNIREF30PATH:-"$PIPEDIR/UniRef30_2020_06/"}

LEN=`tail -n1 $IN | wc -m`

@@ -51,7 +54,7 @@
############################################################
# 3. search for templates
############################################################
-DB="$PIPEDIR/pdb100_2021Mar03/pdb100_2021Mar03"
+DB="$PDB100PATH/pdb100_2021Mar03"
if [ ! -s $WDIR/t000_.hhr ]
then
echo "Running hhsearch"
diff -ruN RoseTTAFold.orig/run_pyrosetta_ver.sh RoseTTAFold/run_pyrosetta_ver.sh
--- RoseTTAFold.orig/run_pyrosetta_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_pyrosetta_ver.sh 2021-08-16 13:21:37.380734181 +0200
@@ -22,6 +22,9 @@
IN="$1" # input.fasta
WDIR=`realpath -s $2` # working folder

+BFDPATH=${BFDPATH:-"$PIPEDIR/bfd/"}
+PDB100PATH=${PDB100PATH:-"$PIPEDIR/pdb100_2021Mar03/"}
+UNIREF30PATH=${UNIREF30PATH:-"$PIPEDIR/UniRef30_2020_06/"}

LEN=`tail -n1 $IN | wc -m`

@@ -51,7 +54,7 @@
############################################################
# 3. search for templates
############################################################
-DB="$PIPEDIR/pdb100_2021Mar03/pdb100_2021Mar03"
+DB="$PDB100PATH/pdb100_2021Mar03"
if [ ! -s $WDIR/t000_.hhr ]
then
echo "Running hhsearch"
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Changes the cache directory path from the installation directory to the users home directory
to avoid a crash due to insufficient permissions.
author: Christoph Siegert (Leipzig University)

diff -ruN RoseTTAFold.orig/network/equivariant_attention/from_se3cnn/utils_steerable.py RoseTTAFold/network/equivariant_attention/from_se3cnn/utils_steerable.py
--- RoseTTAFold.orig/network/equivariant_attention/from_se3cnn/utils_steerable.py 2021-08-16 12:54:05.769231102 +0200
+++ RoseTTAFold/network/equivariant_attention/from_se3cnn/utils_steerable.py 2021-08-18 16:17:10.653216846 +0200
@@ -33,7 +33,7 @@
return get_matrix_kernel(torch.cat(As, dim=0), eps)


-@cached_dirpklgz("%s/cache/trans_Q"%os.path.dirname(os.path.realpath(__file__)))
+@cached_dirpklgz("%s/.cache/trans_Q"%os.path.expanduser('~'))
def _basis_transformation_Q_J(J, order_in, order_out, version=3): # pylint: disable=W0613
"""
:param J: order of the spherical harmonics
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
Pick lddt excetutable from PATH instead of script directory
author: Christoph Siegert (Leipzig University)

diff --git a/DAN-msa/pick_final_models.div.py b/DAN-msa/pick_final_models.div.py
index c1f2e9d..4600837 100644
--- a/DAN-msa/pick_final_models.div.py
+++ b/DAN-msa/pick_final_models.div.py
@@ -7,8 +7,6 @@ import numpy as np
from sklearn.cluster import AgglomerativeClustering
import multiprocessing as mp

-script_dir = "/".join(os.path.dirname(os.path.realpath(__file__)).split('/')[:-1])
-
def smooth(x, window_len=13, window='hanning'):
s = np.r_[[x[0]]*(window_len//2), x, [x[-1]]*(window_len//2)]
if window == 'flat': #moving average
@@ -69,8 +67,8 @@ def calc_lddt_dist(args):
pose_i = pose_s[i]
pose_j = pose_s[j]
#
- lddt_1 = float(os.popen("%s/lddt/lddt -c %s %s | grep Glob"%(script_dir, pose_i, pose_j)).readlines()[-1].split()[-1])
- lddt_2 = float(os.popen("%s/lddt/lddt -c %s %s | grep Glob"%(script_dir, pose_j, pose_i)).readlines()[-1].split()[-1])
+ lddt_1 = float(os.popen("lddt -c %s %s | grep Glob"%(pose_i, pose_j)).readlines()[-1].split()[-1])
+ lddt_2 = float(os.popen("lddt -c %s %s | grep Glob"%(pose_j, pose_i)).readlines()[-1].split()[-1])
lddt = (lddt_1 + lddt_2) / 2.0
return 1 - lddt

Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
Do not attempt to use the conda environment
author: Christoph Siegert (Leipzig University)

diff -ruN RoseTTAFold.orig/run_e2e_ver.sh RoseTTAFold/run_e2e_ver.sh
--- RoseTTAFold.orig/run_e2e_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_e2e_ver.sh 2021-08-16 12:55:10.969056632 +0200
@@ -3,15 +3,6 @@
# make the script stop when error (non-true exit code) is occured
set -e

-############################################################
-# >>> conda initialize >>>
-# !! Contents within this block are managed by 'conda init' !!
-__conda_setup="$('conda' 'shell.bash' 'hook' 2> /dev/null)"
-eval "$__conda_setup"
-unset __conda_setup
-# <<< conda initialize <<<
-############################################################
-
SCRIPT=`realpath -s $0`
export PIPEDIR=`dirname $SCRIPT`

@@ -27,7 +18,6 @@

mkdir -p $WDIR/log

-conda activate RoseTTAFold
############################################################
# 1. generate MSAs
############################################################
diff -ruN RoseTTAFold.orig/run_pyrosetta_ver.sh RoseTTAFold/run_pyrosetta_ver.sh
--- RoseTTAFold.orig/run_pyrosetta_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_pyrosetta_ver.sh 2021-08-16 12:55:43.860968614 +0200
@@ -3,15 +3,6 @@
# make the script stop when error (non-true exit code) is occured
set -e

-############################################################
-# >>> conda initialize >>>
-# !! Contents within this block are managed by 'conda init' !!
-__conda_setup="$('conda' 'shell.bash' 'hook' 2> /dev/null)"
-eval "$__conda_setup"
-unset __conda_setup
-# <<< conda initialize <<<
-############################################################
-
SCRIPT=`realpath -s $0`
export PIPEDIR=`dirname $SCRIPT`

@@ -27,7 +18,6 @@

mkdir -p $WDIR/log

-conda activate RoseTTAFold
############################################################
# 1. generate MSAs
############################################################
@@ -81,9 +71,6 @@
############################################################
mkdir -p $WDIR/pdb-3track

-conda deactivate
-conda activate folding
-
for m in 0 1 2
do
for p in 0.05 0.15 0.25 0.35 0.45
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
Use easy build installation roots
author: Christoph Siegert (Leipzig University)

diff -ruN RoseTTAFold.orig/input_prep/make_ss.sh RoseTTAFold/input_prep/make_ss.sh
--- RoseTTAFold.orig/input_prep/make_ss.sh 2021-08-16 12:54:05.769231102 +0200
+++ RoseTTAFold/input_prep/make_ss.sh 2021-08-16 13:03:22.003742639 +0200
@@ -1,6 +1,6 @@
#!/bin/bash

-DATADIR="$CONDA_PREFIX/share/psipred_4.01/data"
+DATADIR="$EBROOTPSIPRED/data"
echo $DATADIR

i_a3m="$1"
@@ -8,7 +8,7 @@

ID=$(basename $i_a3m .a3m).tmp

-$PIPEDIR/csblast-2.2.3/bin/csbuild -i $i_a3m -I a3m -D $PIPEDIR/csblast-2.2.3/data/K4000.crf -o $ID.chk -O chk
+csbuild -i $i_a3m -I a3m -D $EBROOTCSBLAST/data/K4000.crf -o $ID.chk -O chk

head -n 2 $i_a3m > $ID.fasta
echo $ID.chk > $ID.pn
diff -ruN RoseTTAFold.orig/run_e2e_ver.sh RoseTTAFold/run_e2e_ver.sh
--- RoseTTAFold.orig/run_e2e_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_e2e_ver.sh 2021-08-16 13:01:01.476118686 +0200
@@ -12,8 +12,7 @@
# <<< conda initialize <<<
############################################################

-SCRIPT=`realpath -s $0`
-export PIPEDIR=`dirname $SCRIPT`
+export PIPEDIR=$EBROOTROSETTAFOLD

CPU="8" # number of CPUs to use
MEM="64" # max memory (in GB)
diff -ruN RoseTTAFold.orig/run_pyrosetta_ver.sh RoseTTAFold/run_pyrosetta_ver.sh
--- RoseTTAFold.orig/run_pyrosetta_ver.sh 2021-08-16 12:54:04.081235616 +0200
+++ RoseTTAFold/run_pyrosetta_ver.sh 2021-08-16 13:00:45.388161737 +0200
@@ -12,8 +12,7 @@
# <<< conda initialize <<<
############################################################

-SCRIPT=`realpath -s $0`
-export PIPEDIR=`dirname $SCRIPT`
+export PIPEDIR=$EBROOTROSETTAFOLD

CPU="8" # number of CPUs to use
MEM="64" # max memory (in GB)
5 changes: 4 additions & 1 deletion test/easyconfigs/easyconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,9 @@ def check_dep_vars(self, gen, dep, dep_vars):
# egl variant of glew is required by libwpe
'glew': [('2.2.0; versionsuffix: -egl', [r'libwpe-1\.13\.3-GCCcore-11\.2\.0'])],
'Geant4': [('11.0.1;', [r'GATE-9\.2-foss-2021b'])],
# ncbi-vdb v2.x require HDF5 v1.10.x (HISAT2, SKESA, shovill depend on ncbi-vdb)
# RoseTTAFold on Ivy Bridge needs HH-Suite-3.3.0 or newer
'HH-Suite': [('3.3.0;', ['RoseTTAFold-1.0.0-'])],
# ncbi-vdb v2.x and v3.0.0 require HDF5 v1.10.x (HISAT2, SKESA, shovill depend on ncbi-vdb)
'HDF5': [
(r'1\.10\.', [r'ncbi-vdb-2\.11\.', r'ncbi-vdb-3\.0\.0', r'HISAT2-2\.2\.', r'SKESA-2\.4\.',
Expand All @@ -521,7 +524,7 @@ def check_dep_vars(self, gen, dep, dep_vars):
'LLVM': [
# numba 0.47.x requires LLVM 7.x or 8.x (see https://github.com/numba/llvmlite#compatibility)
(r'8\.', [r'numba-0\.47\.0-', r'librosa-0\.7\.2-', r'BirdNET-20201214-',
r'scVelo-0\.1\.24-', r'PyTorch-Geometric-1\.[346]\.[23]']),
r'scVelo-0\.1\.24-', r'PyTorch-Geometric-1\.[346]\.[23]', 'RoseTTAFold-1.0.0-']),
(r'10\.0\.1', [r'cell2location-0\.05-alpha-', r'cryoDRGN-0\.3\.2-', r'loompy-3\.0\.6-',
r'numba-0\.52\.0-', r'PyOD-0\.8\.7-', r'PyTorch-Geometric-1\.6\.3',
r'scanpy-1\.7\.2-', r'umap-learn-0\.4\.6-']),
Expand Down