Skip to content

Commit 378f1b0

Browse files
committed
25.12 config matching orchestration
1 parent 692798c commit 378f1b0

File tree

2 files changed

+39
-21
lines changed

2 files changed

+39
-21
lines changed

config.yaml

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ steps:
124124
- black_box_warning
125125
- molecule_synonyms
126126
- cross_references
127-
- chebi_par_id
128127
- molecule_structures
129128
- name: elasticsearch chembl target
130129
url: https://www.ebi.ac.uk/chembl/elk/es
@@ -141,7 +140,7 @@ steps:
141140
#: IMPC STEP :####################################################################################
142141
impc:
143142
- name: solr fetch gene_gene
144-
destination: input/evidence/impc
143+
destination: input/impc
145144
url: http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
146145
queries:
147146
- data_type: gene_gene
@@ -150,7 +149,7 @@ steps:
150149
- hgnc_gene_id
151150

152151
- name: solr fetch ontology_ontology
153-
destination: input/evidence/impc
152+
destination: input/impc
154153
url: http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
155154
queries:
156155
- data_type: ontology_ontology
@@ -159,7 +158,7 @@ steps:
159158
- hp_id
160159

161160
- name: solr fetch mouse_model
162-
destination: input/evidence/impc
161+
destination: input/impc
163162
url: http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
164163
queries:
165164
- data_type: mouse_model
@@ -168,7 +167,7 @@ steps:
168167
- model_phenotypes
169168

170169
- name: solr fetch disease
171-
destination: input/evidence/impc
170+
destination: input/impc
172171
url: http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
173172
queries:
174173
- data_type: disease
@@ -177,7 +176,7 @@ steps:
177176
- disease_phenotypes
178177

179178
- name: solr fetch disease_model_summary
180-
destination: input/evidence/impc
179+
destination: input/impc
181180
url: http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
182181
batch_size: 100000000
183182
queries:
@@ -193,7 +192,7 @@ steps:
193192
- marker_id
194193

195194
- name: solr fetch ontology
196-
destination: input/evidence/impc
195+
destination: input/impc
197196
url: http://www.ebi.ac.uk/mi/impc/solr/phenodigm/select
198197
queries:
199198
- data_type: ontology
@@ -210,7 +209,7 @@ steps:
210209
do:
211210
- name: copy ${each}
212211
source: http://www.informatics.jax.org/downloads/reports/${each}
213-
destination: input/evidence/impc/${each}
212+
destination: input/impc/${each}
214213

215214
- name: copy gene name
216215
source: https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt
@@ -313,7 +312,7 @@ steps:
313312
validate_version: 1
314313

315314
- name: copy crispr_brain mappings
316-
source: https://raw.githubusercontent.com/opentargets/curation/refs/heads/master/mappings/disease/brain_crispr_studies.tsv
315+
source: https://raw.githubusercontent.com/opentargets/curation/refs/tags/${ot_curation}/mappings/disease/brain_crispr_studies.tsv
317316
destination: input/ot_curation/mappings/disease/brain_crispr_studies.tsv
318317
##################################################################################################
319318

@@ -379,7 +378,7 @@ steps:
379378
- gene_burden/curated_evidence.tsv
380379
do:
381380
- name: copy ${each}
382-
source: https://raw.githubusercontent.com/opentargets/curation/refs/heads/${ot_curation}/${each}
381+
source: https://raw.githubusercontent.com/opentargets/curation/refs/tags/${ot_curation}/${each}
383382
destination: input/ot_curation/${each}
384383
##################################################################################################
385384

@@ -546,7 +545,7 @@ steps:
546545
destination: input/expression/map_with_efos.json
547546

548547
- name: copy normal tissues
549-
source: https://www.proteinatlas.org/download/tsv/normal_tissue.tsv.zip
548+
source: https://www.proteinatlas.org/download/tsv/normal_ihc_data.tsv.zip
550549
destination: input/expression/normal_tissue.tsv.zip
551550

552551
- name: find_latest baseline expression binned
@@ -612,11 +611,18 @@ steps:
612611
destination: input/interaction/string-interactions.txt.gz
613612
##################################################################################################
614613

614+
#: ENHANCER TO GENE STEP
615+
enhancer_to_gene:
616+
- name: explode_glob rE2G interval
617+
glob: gs://interval_data/datasets/ENCODE-rE2G/processed_E2G_2509_filtered.parquet/*.parquet
618+
do:
619+
- name: copy rE2G interval ${match_stem}
620+
source: ${match_prefix}/${match_path}${match_stem}.${match_ext}
621+
destination: output/enhancer_to_gene/${uuid}.parquet
622+
##################################################################################################
623+
615624
#: L2G STEP :#####################################################################################
616625
l2g:
617-
- name: copy l2g model
618-
source: gs://ot_orchestration/benchmarks/l2g/fm0/v5.1_best_cv/locus_to_gene_model/classifier.skops
619-
destination: etc/model/l2g_prediction/classifier.skops
620626
- name: explode_glob l2g gold_standard
621627
glob: gs://genetics-portal-dev-analysis/yt4/2506_release/training_set/20250625_gentropy_paper_v1.json/*.json
622628
do:
@@ -630,6 +636,14 @@ steps:
630636
- name: copy literature
631637
source: https://ftp.ebi.ac.uk/pub/databases/pmc/DOI/PMID_PMCID_DOI.csv.gz
632638
destination: input/literature/PMID_PMCID_DOI.csv.gz
639+
640+
- name: explode_glob literature dating lut
641+
glob: gs://otar000-evidence_input/literature_export/*.json.gz
642+
do:
643+
- name: copy literature dating lut ${match_stem}
644+
source: ${match_prefix}/${match_path}${match_stem}.${match_ext}
645+
destination: input/literature/literature_export/${uuid}.json.gz
646+
633647
##################################################################################################
634648

635649
#: ONTOMA STEP :##################################################################################
@@ -752,16 +766,20 @@ steps:
752766
destination: input/target/gene-essentiality/depmap/${match_stem}.${match_ext}
753767

754768
- name: copy ot_curation depmap_tissue_mapping
755-
source: https://raw.githubusercontent.com/opentargets/otar/refs/tags/${ot_curation}/gene-essentiality/depmap_uberon_mapping.csv
769+
source: https://raw.githubusercontent.com/opentargets/curation/refs/tags/${ot_curation}/mappings/biosystem/depmap_uberon_mapping.csv
756770
destination: input/ot_curation/mappings/biosystem/depmap_uberon_mapping.csv
757771

758-
- name: copy gene name
772+
- name: copy gene name tsv
759773
source: https://storage.googleapis.com/public-download-files/hgnc/tsv/tsv/hgnc_complete_set.txt
760774
destination: input/target/genenames/hgnc_complete_set.tsv
761775

776+
- name: copy gene name json
777+
source: https://storage.googleapis.com/public-download-files/hgnc/json/json/hgnc_complete_set.json
778+
destination: input/target/genenames/hgnc_complete_set.json
779+
762780
- name: copy gnomad
763-
source: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz
764-
destination: input/target/gnomad/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz
781+
source: https://storage.googleapis.com/gcp-public-data--gnomad/release/${gnomad_version}/constraint/gnomad.v${gnomad_version}.constraint_metrics.tsv
782+
destination: input/target/gnomad/gnomad_constraint_metrics.tsv
765783

766784
- name: copy go annotation human eco gpa
767785
source: https://ftp.ebi.ac.uk/pub/databases/GO/goa/HUMAN/goa_human.gpa.gz
@@ -810,10 +828,10 @@ steps:
810828
destination: input/target/homologue/gene_dictionary/${each}.json
811829
- name: copy ${each} protein homology
812830
source: https://ftp.ensembl.org/pub/release-${ensembl_version}/tsv/ensembl-compara/homologies/${each}/Compara.${ensembl_version}.protein_default.homologies.tsv.gz
813-
destination: input/target/homologue/homologies/protein-${each}.tsv
831+
destination: input/target/homologue/homologies/protein-${each}.tsv.gz
814832
- name: copy ${each} ncrna homology
815833
source: https://ftp.ensembl.org/pub/release-${ensembl_version}/tsv/ensembl-compara/homologies/${each}/Compara.${ensembl_version}.ncrna_default.homologies.tsv.gz
816-
destination: input/target/homologue/homologies/ncrna-${each}.tsv
834+
destination: input/target/homologue/homologies/ncrna-${each}.tsv.gz
817835

818836
- name: copy homology ensembl vertebrates
819837
source: https://ftp.ensembl.org/pub/release-${ensembl_version}/species_EnsemblVertebrates.txt

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "PIS"
3-
version = "25.2.3-dev.27"
3+
version = "25.12.0"
44
description = "Open Targets Pipeline Input Stage"
55
readme = "README.md"
66
requires-python = ">=3.11,<3.14"

0 commit comments

Comments
 (0)