Skip to content

Commit 0867bfc

Browse files
authored
Merge pull request #980 from AlexsLemonade/dev
Production Deploy
2 parents d3ea5fb + 59327f9 commit 0867bfc

39 files changed

+806
-188
lines changed

.github/workflows/deploy_prod_backend.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Checkout
14-
uses: actions/checkout@v2
14+
uses: actions/checkout@v4
1515

1616
- name: Load 1Password Secrets
1717
id: op-load-secrets
@@ -31,9 +31,9 @@ jobs:
3131
SENTRY_DSN: "${{ secrets.OP_SENTRY_DSN }}"
3232

3333
- name: Setup Terraform
34-
uses: hashicorp/setup-terraform@v1
34+
uses: hashicorp/setup-terraform@v3
3535
with:
36-
terraform_version: 0.12.26
36+
terraform_version: 0.13.0
3737

3838
- name: Deploy
3939
run: cd infrastructure && python3 deploy.py -e prod -u deployer -d ccdl -v $(git rev-parse HEAD)

.github/workflows/deploy_staging_backend.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Checkout
14-
uses: actions/checkout@v2
14+
uses: actions/checkout@v4
1515

1616
- name: Load 1Password Secrets
1717
id: op-load-secrets
@@ -31,9 +31,9 @@ jobs:
3131
SENTRY_DSN: "${{ secrets.OP_SENTRY_DSN }}"
3232

3333
- name: Setup Terraform
34-
uses: hashicorp/setup-terraform@v1
34+
uses: hashicorp/setup-terraform@v3
3535
with:
36-
terraform_version: 0.12.26
36+
terraform_version: 0.13.0
3737

3838
- name: Deploy
3939
run: cd infrastructure && python3 deploy.py -e staging -u deployer -d ccdlstaging -v $(git rev-parse HEAD)

.gitignore

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ infrastructure/.terraform.lock.hcl
128128
.vscode
129129
*.code-workspace
130130

131+
# SSH keys
132+
*.pem
133+
*.pub
134+
135+
# 1Password integration
136+
.op/
137+
131138
#
132139
# Client
133140
#
@@ -147,9 +154,6 @@ client/out/
147154
# production
148155
client/build
149156

150-
# misc
151-
*.pem
152-
153157
# debug
154158
client/npm-debug.log*
155159
client/yarn-debug.log*

api/scpca_portal/config/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,6 @@ class Common(Configuration):
184184
CORS_ALLOW_HEADERS = default_headers + (API_KEY_HEADER,)
185185

186186
TERMS_AND_CONDITIONS = "PLACEHOLDER"
187+
188+
# AWS
189+
AWS_REGION = os.getenv("AWS_REGION", "us-east-1")

api/scpca_portal/config/production.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@ class Production(Common):
1515

1616
UPDATE_S3_DATA = True
1717

18-
# AWS
19-
AWS_REGION = os.getenv("AWS_REGION")
20-
2118
# AWS S3
2219
AWS_S3_INPUT_BUCKET_NAME = "scpca-portal-inputs"
2320
AWS_S3_OUTPUT_BUCKET_NAME = os.getenv("AWS_S3_BUCKET_NAME")
2421

22+
# AWS Batch
23+
AWS_BATCH_JOB_QUEUE_NAME = os.environ.get("AWS_BATCH_JOB_QUEUE_NAME")
24+
AWS_BATCH_JOB_DEFINITION_NAME = os.environ.get("AWS_BATCH_JOB_DEFINITION_NAME")
25+
2526
# https://developers.google.com/web/fundamentals/performance/optimizing-content-efficiency/http-caching#cache-control
2627
# Response can be cached by browser and any intermediary caches
2728
# (i.e. it is "public") for up to 1 day

api/scpca_portal/loader.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import shutil
22
from concurrent.futures import ThreadPoolExecutor
33
from functools import partial
4-
from threading import Lock
54
from typing import Any, Dict, List, Set
65

76
from django.conf import settings
@@ -10,7 +9,14 @@
109

1110
from scpca_portal import common, metadata_file, s3
1211
from scpca_portal.config.logging import get_and_configure_logger
13-
from scpca_portal.models import ComputedFile, Contact, ExternalAccession, Project, Publication
12+
from scpca_portal.models import (
13+
ComputedFile,
14+
Contact,
15+
ExternalAccession,
16+
Project,
17+
Publication,
18+
Sample,
19+
)
1420

1521
logger = get_and_configure_logger(__name__)
1622

@@ -136,25 +142,55 @@ def create_project(
136142
return project
137143

138144

139-
def _create_computed_file(future, *, update_s3: bool, clean_up_output_data: bool) -> None:
145+
def _create_computed_file(
146+
computed_file: ComputedFile, update_s3: bool, clean_up_output_data: bool
147+
) -> None:
140148
"""
141149
Save computed file returned from future to the db.
142150
Upload file to s3 and clean up output data depending on passed options.
143151
"""
144-
if computed_file := future.result():
145-
146-
# Only upload and clean up projects and the last sample if multiplexed
147-
if computed_file.project or computed_file.sample.is_last_multiplexed_sample:
148-
if update_s3:
149-
s3.upload_output_file(computed_file.s3_key, computed_file.s3_bucket)
150-
if clean_up_output_data:
151-
computed_file.clean_up_local_computed_file()
152+
if update_s3:
153+
s3.upload_output_file(computed_file.s3_key, computed_file.s3_bucket)
154+
if clean_up_output_data:
155+
computed_file.clean_up_local_computed_file()
156+
157+
if computed_file.sample and computed_file.has_multiplexed_data:
158+
computed_files = computed_file.get_multiplexed_computed_files()
159+
ComputedFile.objects.bulk_create(computed_files)
160+
else:
152161
computed_file.save()
153162

163+
164+
def _create_computed_file_callback(future, *, update_s3: bool, clean_up_output_data: bool) -> None:
165+
"""
166+
Wrap computed file saving and uploading to s3 in a way that accommodates multiprocessing.
167+
"""
168+
if computed_file := future.result():
169+
_create_computed_file(computed_file, update_s3, clean_up_output_data)
170+
154171
# Close DB connection for each thread.
155172
connection.close()
156173

157174

175+
def generate_computed_file(
176+
*,
177+
download_config: Dict,
178+
project: Project | None = None,
179+
sample: Sample | None = None,
180+
update_s3: bool = True,
181+
) -> None:
182+
183+
# Purge old computed file
184+
if old_computed_file := (project or sample).get_computed_file(download_config):
185+
old_computed_file.purge(update_s3)
186+
187+
if project and (computed_file := ComputedFile.get_project_file(project, download_config)):
188+
_create_computed_file(computed_file, update_s3, clean_up_output_data=False)
189+
if sample and (computed_file := ComputedFile.get_sample_file(sample, download_config)):
190+
_create_computed_file(computed_file, update_s3, clean_up_output_data=False)
191+
sample.project.update_downloadable_sample_count()
192+
193+
158194
def generate_computed_files(
159195
project: Project,
160196
max_workers: int,
@@ -170,33 +206,27 @@ def generate_computed_files(
170206

171207
# Prep callback function
172208
on_get_file = partial(
173-
_create_computed_file,
209+
_create_computed_file_callback,
174210
update_s3=update_s3,
175211
clean_up_output_data=clean_up_output_data,
176212
)
177-
# Prepare a threading.Lock for each sample, with the chief purpose being to protect
178-
# multiplexed samples that share a zip file.
179-
locks = {}
213+
180214
with ThreadPoolExecutor(max_workers=max_workers) as tasks:
181215
# Generated project computed files
182216
for config in common.GENERATED_PROJECT_DOWNLOAD_CONFIGS:
183217
tasks.submit(
184218
ComputedFile.get_project_file,
185219
project,
186220
config,
187-
project.get_output_file_name(config),
188221
).add_done_callback(on_get_file)
189222

190223
# Generated sample computed files
191-
for sample in project.samples.all():
224+
for sample in project.samples_to_generate:
192225
for config in common.GENERATED_SAMPLE_DOWNLOAD_CONFIGS:
193-
sample_lock = locks.setdefault(sample.get_config_identifier(config), Lock())
194226
tasks.submit(
195227
ComputedFile.get_sample_file,
196228
sample,
197229
config,
198-
sample.get_output_file_name(config),
199-
sample_lock,
200230
).add_done_callback(on_get_file)
201231

202232
project.update_downloadable_sample_count()
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import logging
2+
3+
from django.conf import settings
4+
from django.core.management.base import BaseCommand
5+
6+
import boto3
7+
8+
from scpca_portal import common
9+
from scpca_portal.models import Project
10+
11+
batch = boto3.client(
12+
"batch",
13+
region_name=settings.AWS_REGION,
14+
)
15+
logger = logging.getLogger()
16+
logger.setLevel(logging.INFO)
17+
logger.addHandler(logging.StreamHandler())
18+
19+
20+
class Command(BaseCommand):
21+
help = """
22+
Submits all computed file combinations to the specified AWS Batch job queue
23+
for projects for which computed files have yet to be generated for them.
24+
If a project-id is passed, then computed files are only submitted for that specific project.
25+
"""
26+
27+
def add_arguments(self, parser):
28+
parser.add_argument("--project-id", type=str)
29+
30+
def handle(self, *args, **kwargs):
31+
self.dispatch_to_batch(**kwargs)
32+
33+
def submit_job(
34+
self,
35+
*,
36+
download_config_name: str,
37+
project_id: str = "",
38+
sample_id: str = "",
39+
) -> None:
40+
"""
41+
Submit job to AWS Batch, accordingly to the resource_id and download_config combination.
42+
"""
43+
resource_flag = "--project-id" if project_id else "--sample-id"
44+
resource_id = project_id if project_id else sample_id
45+
job_name = f"{resource_id}-{download_config_name}"
46+
47+
response = batch.submit_job(
48+
jobName=job_name,
49+
jobQueue=settings.AWS_BATCH_JOB_QUEUE_NAME,
50+
jobDefinition=settings.AWS_BATCH_JOB_DEFINITION_NAME,
51+
containerOverrides={
52+
"command": [
53+
"python",
54+
"manage.py",
55+
"generate_computed_file",
56+
resource_flag,
57+
resource_id,
58+
"--download-config-name",
59+
download_config_name,
60+
],
61+
},
62+
)
63+
64+
logger.info(f'{job_name} submitted to Batch with jobId {response["jobId"]}')
65+
66+
def dispatch_to_batch(self, project_id: str = "", **kwargs):
67+
"""
68+
Iterate over all projects that don't have computed files and submit each
69+
resource_id and download_config combination to the Batch queue.
70+
If a project id is passed, then computed files are created for all combinations
71+
within that project.
72+
"""
73+
projects = (
74+
Project.objects.filter(project_computed_files__is_null=True)
75+
if not project_id
76+
else Project.objects.filter(scpca_id=project_id)
77+
)
78+
79+
for project in projects:
80+
for download_config_name in common.PROJECT_DOWNLOAD_CONFIGS.keys():
81+
self.submit_job(
82+
project_id=project.scpca_id,
83+
download_config_name=download_config_name,
84+
)
85+
86+
for sample in project.samples_to_generate:
87+
for download_config_name in common.SAMPLE_DOWNLOAD_CONFIGS.keys():
88+
self.submit_job(
89+
sample_id=sample.scpca_id,
90+
download_config_name=download_config_name,
91+
)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import logging
2+
3+
from django.core.management.base import BaseCommand
4+
5+
from scpca_portal import common, loader
6+
from scpca_portal.models import Project, Sample
7+
8+
logger = logging.getLogger()
9+
logger.setLevel(logging.INFO)
10+
logger.addHandler(logging.StreamHandler())
11+
12+
13+
class Command(BaseCommand):
14+
help = """
15+
This command is meant to be called as an entrypoint to AWS Batch Fargate job instance.
16+
Individual files are computed according:
17+
- To the project or sample id
18+
- An appropriate corresponding download config
19+
20+
When computation is completed, files are uploaded to S3, and the job is marked as completed.
21+
22+
At which point the instance which generated this computed file will receive a new job
23+
from the job queue and begin computing the next file.
24+
"""
25+
26+
def add_arguments(self, parser):
27+
parser.add_argument("--project-id", type=str)
28+
parser.add_argument("--sample-id", type=str)
29+
parser.add_argument("--download-config-name", type=str)
30+
31+
def handle(self, *args, **kwargs):
32+
self.generate_computed_file(**kwargs)
33+
34+
def generate_computed_file(
35+
self,
36+
project_id: str,
37+
sample_id: str,
38+
download_config_name: str,
39+
**kwargs,
40+
) -> None:
41+
"""Generates a project's computed files according predetermined download configurations"""
42+
loader.prep_data_dirs()
43+
44+
ids_not_mutually_exclusive = project_id and sample_id or (not project_id and not sample_id)
45+
if ids_not_mutually_exclusive:
46+
logger.error(
47+
"Invalid id combination. Passed ids must be mutually exclusive."
48+
"Either a project_id or a sample_id must be passed, but not both or neither."
49+
)
50+
51+
if project_id:
52+
project = Project.objects.filter(scpca_id=project_id).first()
53+
if not project:
54+
logger.error(f"{project} does not exist.")
55+
if download_config_name not in common.PROJECT_DOWNLOAD_CONFIGS.keys():
56+
logger.error(f"{download_config_name} is not a valid project download config name.")
57+
logger.info(
58+
f"Here are valid download_config_name values for projects: "
59+
f"{common.PROJECT_DOWNLOAD_CONFIGS.keys()}"
60+
)
61+
download_config = common.PROJECT_DOWNLOAD_CONFIGS[download_config_name]
62+
loader.generate_computed_file(project=project, download_config=download_config)
63+
64+
if sample_id:
65+
sample = Sample.objects.filter(scpca_id=sample_id).first()
66+
if not sample:
67+
logger.error(f"{sample} does not exist.")
68+
if download_config_name not in common.SAMPLE_DOWNLOAD_CONFIGS.keys():
69+
logger.error(f"{download_config_name} is not a valid sample download config name.")
70+
logger.info(
71+
f"Here are valid download_config_name values for samples: "
72+
f"{common.SAMPLE_DOWNLOAD_CONFIGS.keys()}"
73+
)
74+
download_config = common.SAMPLE_DOWNLOAD_CONFIGS[download_config_name]
75+
loader.generate_computed_file(sample=sample, download_config=download_config)

0 commit comments

Comments
 (0)