Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ work_path: ./work
log_level: INFO
pool_size: 16

release_uri: gs://open-targets-pre-data-releases/uo_test_otter
# release_uri: gs://ot_orchestration/pis/encode
scratchpad:
chembl_version: '36'
efo_version: '3.83.0'
Expand Down Expand Up @@ -935,3 +935,40 @@ steps:
source: https://raw.githubusercontent.com/opentargets/target_engine/main/src/data_flow/phenotypeScores/20230825_mousePheScores.csv
destination: input/target_engine/mouse_pheno_scores.csv
##################################################################################################


encode_rE2G:
- name: copy encode rE2G manifest
# NOTE: excluded samples from query have divergent schemas that do not conform to expected rE2G format
source: https://www.encodeproject.org/report.tsv?type=File&searchTerm=encode-re2g+thresholded&output_type=thresholded+element+gene+links&status=released&limit=all&accession!=ENCFF094BVF&accession!=ENCFF269DKY
destination: input/encode-rE2G/e2g_manifest.tsv

- name: crawl_encode rE2G manifest
requires:
- copy encode rE2G manifest
manifest: input/encode-rE2G/e2g_manifest.tsv
columns:
accession: 'Accession'
url_stem: 'Download URL'
expand: # COPY task template
name: copy encode rE2G ${accession}
source: https://www.encodeproject.org${url_stem}
destination: input/encode-rE2G/${accession}.bed.gz
##################################################################################################

epiraction:
- name: copy encode epiraction manifest
source: https://www.encodeproject.org/report.tsv?type=File&searchTerm=epiraction+thresholded&output_type=thresholded+element+gene+links&status=released&limit=all
destination: input/epiraction/epiraction_manifest.tsv

- name: crawl_encode epiraction manifest
requires:
- copy encode epiraction manifest
manifest: input/epiraction/epiraction_manifest.tsv
columns:
accession: 'Accession'
url_stem: 'Download URL'
expand: # COPY task template
name: copy encode epiraction ${accession}
source: https://www.encodeproject.org${url_stem}
destination: input/epiraction/${accession}.bed.gz
11 changes: 8 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
[project]
name = "PIS"
version = "25.12.0"
version = "26.0.0-dev+3954.1"
description = "Open Targets Pipeline Input Stage"
readme = "README.md"
requires-python = ">=3.11,<3.14"
authors = [{ name = "Open Targets Core Team", email = "devs@opentargets.net" }]
license = { text = "Apache-2.0" }
dependencies = [
"elasticsearch==7.17.12", # must be ^7.0.0 to be compatible with chembl es server
"aiofiles>=25.1.0",
"aiohttp>=3.13.3",
"asyncio>=4.0.0",
"elasticsearch==7.17.12",
"gcloud-aio-storage>=9.6.1",
"jq==1.8.0",
"loguru==0.7.3",
"opentargets-otter==25.0.16",
"requests>=2.32.4",
"tqdm>=4.67.1",
"urllib3>=2.5.0",
]

Expand Down Expand Up @@ -48,7 +53,7 @@ include-package-data = false
pis = "pis.core:main"

[tool.pytest.ini_options]
testpaths = ["src/test"]
testpaths = ["test"]
filterwarnings = "ignore:.*google._upb*" # warning from google protobuf library

[tool.coverage.report]
Expand Down
Loading
Loading