@@ -124,7 +124,6 @@ steps:
124124 - black_box_warning
125125 - molecule_synonyms
126126 - cross_references
127- - chebi_par_id
128127 - molecule_structures
129128 - name : elasticsearch chembl target
130129 url : https://www.ebi.ac.uk/chembl/elk/es
@@ -141,7 +140,7 @@ steps:
141140 # : IMPC STEP :####################################################################################
142141 impc :
143142 - name : solr fetch gene_gene
144- destination : input/evidence/ impc
143+ destination : input/impc
145144 url : http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
146145 queries :
147146 - data_type : gene_gene
@@ -150,7 +149,7 @@ steps:
150149 - hgnc_gene_id
151150
152151 - name : solr fetch ontology_ontology
153- destination : input/evidence/ impc
152+ destination : input/impc
154153 url : http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
155154 queries :
156155 - data_type : ontology_ontology
@@ -159,7 +158,7 @@ steps:
159158 - hp_id
160159
161160 - name : solr fetch mouse_model
162- destination : input/evidence/ impc
161+ destination : input/impc
163162 url : http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
164163 queries :
165164 - data_type : mouse_model
@@ -168,7 +167,7 @@ steps:
168167 - model_phenotypes
169168
170169 - name : solr fetch disease
171- destination : input/evidence/ impc
170+ destination : input/impc
172171 url : http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
173172 queries :
174173 - data_type : disease
@@ -177,7 +176,7 @@ steps:
177176 - disease_phenotypes
178177
179178 - name : solr fetch disease_model_summary
180- destination : input/evidence/ impc
179+ destination : input/impc
181180 url : http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
182181 batch_size : 100000000
183182 queries :
@@ -193,7 +192,7 @@ steps:
193192 - marker_id
194193
195194 - name : solr fetch ontology
196- destination : input/evidence/ impc
195+ destination : input/impc
197196 url : http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
198197 queries :
199198 - data_type : ontology
@@ -210,7 +209,7 @@ steps:
210209 do :
211210 - name : copy ${each}
212211 source : http://www.informatics.jax.org/downloads/reports/${each}
213- destination : input/evidence/ impc/${each}
212+ destination : input/impc/${each}
214213
215214 - name : copy gene name
216215 source : https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt
@@ -313,7 +312,7 @@ steps:
313312 validate_version : 1
314313
315314 - name : copy crispr_brain mappings
316- source : https://raw.githubusercontent.com/opentargets/curation/refs/heads/master /mappings/disease/brain_crispr_studies.tsv
315+ source : https://raw.githubusercontent.com/opentargets/curation/refs/tags/${ot_curation} /mappings/disease/brain_crispr_studies.tsv
317316 destination : input/ot_curation/mappings/disease/brain_crispr_studies.tsv
318317 # #################################################################################################
319318
@@ -379,7 +378,7 @@ steps:
379378 - gene_burden/curated_evidence.tsv
380379 do :
381380 - name : copy ${each}
382- source : https://raw.githubusercontent.com/opentargets/curation/refs/heads /${ot_curation}/${each}
381+ source : https://raw.githubusercontent.com/opentargets/curation/refs/tags /${ot_curation}/${each}
383382 destination : input/ot_curation/${each}
384383 # #################################################################################################
385384
@@ -546,7 +545,7 @@ steps:
546545 destination : input/expression/map_with_efos.json
547546
548547 - name : copy normal tissues
549- source : https://www.proteinatlas.org/download/tsv/normal_tissue .tsv.zip
548+ source : https://www.proteinatlas.org/download/tsv/normal_ihc_data .tsv.zip
550549 destination : input/expression/normal_tissue.tsv.zip
551550
552551 - name : find_latest baseline expression binned
@@ -612,11 +611,18 @@ steps:
612611 destination : input/interaction/string-interactions.txt.gz
613612 # #################################################################################################
614613
614+ # : ENHANCER TO GENE STEP
615+ enhancer_to_gene :
616+ - name : explode_glob rE2G interval
617+ glob : gs://interval_data/datasets/ENCODE-rE2G/processed_E2G_2509_filtered.parquet/*.parquet
618+ do :
619+ - name : copy rE2G interval ${match_stem}
620+ source : ${match_prefix}/${match_path}${match_stem}.${match_ext}
621+ destination : output/enhancer_to_gene/${uuid}.parquet
622+ # #################################################################################################
623+
615624 # : L2G STEP :#####################################################################################
616625 l2g :
617- - name : copy l2g model
618- source : gs://ot_orchestration/benchmarks/l2g/fm0/v5.1_best_cv/locus_to_gene_model/classifier.skops
619- destination : etc/model/l2g_prediction/classifier.skops
620626 - name : explode_glob l2g gold_standard
621627 glob : gs://genetics-portal-dev-analysis/yt4/2506_release/training_set/20250625_gentropy_paper_v1.json/*.json
622628 do :
@@ -630,6 +636,14 @@ steps:
630636 - name : copy literature
631637 source : https://ftp.ebi.ac.uk/pub/databases/pmc/DOI/PMID_PMCID_DOI.csv.gz
632638 destination : input/literature/PMID_PMCID_DOI.csv.gz
639+
640+ - name : explode_glob literature dating lut
641+ glob : gs://otar000-evidence_input/literature_export/*.json.gz
642+ do :
643+ - name : copy literature dating lut ${match_stem}
644+ source : ${match_prefix}/${match_path}${match_stem}.${match_ext}
645+ destination : input/literature/literature_export/${uuid}.json.gz
646+
633647 # #################################################################################################
634648
635649 # : ONTOMA STEP :##################################################################################
@@ -752,16 +766,20 @@ steps:
752766 destination : input/target/gene-essentiality/depmap/${match_stem}.${match_ext}
753767
754768 - name : copy ot_curation depmap_tissue_mapping
755- source : https://raw.githubusercontent.com/opentargets/otar /refs/tags/${ot_curation}/gene-essentiality /depmap_uberon_mapping.csv
769+ source : https://raw.githubusercontent.com/opentargets/curation /refs/tags/${ot_curation}/mappings/biosystem /depmap_uberon_mapping.csv
756770 destination : input/ot_curation/mappings/biosystem/depmap_uberon_mapping.csv
757771
758- - name : copy gene name
772+ - name : copy gene name tsv
759773 source : https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt
760774 destination : input/target/genenames/hgnc_complete_set.tsv
761775
776+ - name : copy gene name json
777+ source : https://storage.googleapis.com/public-download-files/hgnc/json/json/hgnc_complete_set.json
778+ destination : input/target/genenames/hgnc_complete_set.json
779+
762780 - name : copy gnomad
763- source : https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1 /constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz
764- destination : input/target/gnomad/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz
781+ source : https://storage.googleapis.com/gcp-public-data--gnomad/release/${gnomad_version} /constraint/gnomad.v${gnomad_version}.constraint_metrics.tsv
782+ destination : input/target/gnomad/gnomad_constraint_metrics.tsv
765783
766784 - name : copy go annotation human eco gpa
767785 source : https://ftp.ebi.ac.uk/pub/databases/GO/goa/HUMAN/goa_human.gpa.gz
@@ -810,10 +828,10 @@ steps:
810828 destination : input/target/homologue/gene_dictionary/${each}.json
811829 - name : copy ${each} protein homology
812830 source : https://ftp.ensembl.org/pub/release-${ensembl_version}/tsv/ensembl-compara/homologies/${each}/Compara.${ensembl_version}.protein_default.homologies.tsv.gz
813- destination : input/target/homologue/homologies/protein-${each}.tsv
831+ destination : input/target/homologue/homologies/protein-${each}.tsv.gz
814832 - name : copy ${each} ncrna homology
815833 source : https://ftp.ensembl.org/pub/release-${ensembl_version}/tsv/ensembl-compara/homologies/${each}/Compara.${ensembl_version}.ncrna_default.homologies.tsv.gz
816- destination : input/target/homologue/homologies/ncrna-${each}.tsv
834+ destination : input/target/homologue/homologies/ncrna-${each}.tsv.gz
817835
818836 - name : copy homology ensembl vertebrates
819837 source : https://ftp.ensembl.org/pub/release-${ensembl_version}/species_EnsemblVertebrates.txt
0 commit comments