Skip to content

Commit 33a7b76

Browse files
committed
file storage
1 parent f2ba214 commit 33a7b76

File tree

8 files changed

+164
-27
lines changed

8 files changed

+164
-27
lines changed

dev/environment

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ CAMO_KEY=insecurecamokey
2323
DOCS_URL="https://pythonhosted.org/{project}/"
2424

2525
FILES_BACKEND=warehouse.packaging.services.LocalFileStorage path=/var/opt/warehouse/packages/ url=http://localhost:9001/packages/{path}
26+
SIMPLE_BACKEND=warehouse.packaging.services.LocalSimpleStorage path=/var/opt/warehouse/simple/ url=http://localhost:9001/simple/{path}
2627
DOCS_BACKEND=warehouse.packaging.services.LocalDocsStorage path=/var/opt/warehouse/docs/
2728

2829
MAIL_BACKEND=warehouse.email.services.SMTPEmailSender host=smtp port=2525 ssl=false [email protected]

docker-compose.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
version: '3'
22

33
volumes:
4+
simple:
45
packages:
56
vault:
67

@@ -85,6 +86,7 @@ services:
8586
- ./htmlcov:/opt/warehouse/src/htmlcov:z
8687
- .coveragerc:/opt/warehouse/src/.coveragerc:z
8788
- packages:/var/opt/warehouse/packages
89+
- simple:/var/opt/warehouse/simple
8890
- ./bin:/opt/warehouse/src/bin:z
8991
ports:
9092
- "80:8000"
@@ -96,6 +98,7 @@ services:
9698
command: python -m http.server 9001
9799
volumes:
98100
- packages:/var/opt/warehouse/packages
101+
- simple:/var/opt/warehouse/simple
99102
ports:
100103
- "9001:9001"
101104

@@ -111,6 +114,7 @@ services:
111114
environment:
112115
C_FORCE_ROOT: "1"
113116
FILES_BACKEND: "warehouse.packaging.services.LocalFileStorage path=/var/opt/warehouse/packages/ url=http://files:9001/packages/{path}"
117+
SIMPLE_BACKEND: "warehouse.packaging.services.LocalSimpleStorage path=/var/opt/warehouse/simple/ url=http://files:9001/simple/{path}"
114118

115119
static:
116120
build:

warehouse/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ def configure(settings=None):
216216
default=21600, # 6 hours
217217
)
218218
maybe_set_compound(settings, "files", "backend", "FILES_BACKEND")
219+
maybe_set_compound(settings, "simple", "backend", "SIMPLE_BACKEND")
219220
maybe_set_compound(settings, "docs", "backend", "DOCS_BACKEND")
220221
maybe_set_compound(settings, "origin_cache", "backend", "ORIGIN_CACHE")
221222
maybe_set_compound(settings, "mail", "backend", "MAIL_BACKEND")

warehouse/legacy/api/simple.py

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,14 @@
1111
# limitations under the License.
1212

1313

14-
from packaging.version import parse
1514
from pyramid.httpexceptions import HTTPMovedPermanently
1615
from pyramid.view import view_config
1716
from sqlalchemy import func
18-
from sqlalchemy.orm import joinedload
1917

2018
from warehouse.cache.http import cache_control
2119
from warehouse.cache.origin import origin_cache
22-
from warehouse.packaging.models import File, JournalEntry, Project, Release
20+
from warehouse.packaging.models import JournalEntry, Project
21+
from warehouse.packaging.utils import _simple_detail, render_simple_detail
2322

2423

2524
@view_config(
@@ -49,20 +48,6 @@ def simple_index(request):
4948
return {"projects": projects}
5049

5150

52-
def _simple_detail(project, request):
53-
# Get all of the files for this project.
54-
files = sorted(
55-
request.db.query(File)
56-
.options(joinedload(File.release))
57-
.join(Release)
58-
.filter(Release.project == project)
59-
.all(),
60-
key=lambda f: (parse(f.release.version), f.filename),
61-
)
62-
63-
return {"project": project, "files": files}
64-
65-
6651
@view_config(
6752
route_name="legacy.api.simple.detail",
6853
context=Project,
@@ -88,4 +73,6 @@ def simple_detail(project, request):
8873
# Get the latest serial number for this project.
8974
request.response.headers["X-PyPI-Last-Serial"] = str(project.last_serial)
9075

76+
render_simple_detail(project, request, store=True)
77+
9178
return _simple_detail(project, request)

warehouse/packaging/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from warehouse.accounts.models import Email, User
1818
from warehouse.cache.origin import key_factory, receive_set
1919
from warehouse.manage.tasks import update_role_invitation_status
20-
from warehouse.packaging.interfaces import IDocsStorage, IFileStorage
20+
from warehouse.packaging.interfaces import IDocsStorage, IFileStorage, ISimpleStorage
2121
from warehouse.packaging.models import File, Project, Release, Role
2222
from warehouse.packaging.tasks import (
2323
compute_trending,
@@ -44,6 +44,9 @@ def includeme(config):
4444
files_storage_class = config.maybe_dotted(config.registry.settings["files.backend"])
4545
config.register_service_factory(files_storage_class.create_service, IFileStorage)
4646

47+
simple_storage_class = config.maybe_dotted(config.registry.settings["simple.backend"])
48+
config.register_service_factory(simple_storage_class.create_service, ISimpleStorage)
49+
4750
docs_storage_class = config.maybe_dotted(config.registry.settings["docs.backend"])
4851
config.register_service_factory(docs_storage_class.create_service, IDocsStorage)
4952

warehouse/packaging/interfaces.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,27 @@ def store(path, file_path, *, meta=None):
3434
"""
3535

3636

37+
class ISimpleStorage(Interface):
38+
def create_service(context, request):
39+
"""
40+
Create the service, given the context and request for which it is being
41+
created for, passing a name for settings.
42+
"""
43+
44+
def get(path):
45+
"""
46+
Return a file like object that can be read to access the file located
47+
at the given path.
48+
"""
49+
50+
def store(path, file_path, *, meta=None):
51+
"""
52+
Save the file located at file_path to the file storage at the location
53+
specified by path. An additional meta keyword argument may contain
54+
extra information that an implementation may or may not store.
55+
"""
56+
57+
3758
class IDocsStorage(Interface):
3859
def create_service(context, request):
3960
"""

warehouse/packaging/services.py

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from zope.interface import implementer
2222

23-
from warehouse.packaging.interfaces import IDocsStorage, IFileStorage
23+
from warehouse.packaging.interfaces import IDocsStorage, IFileStorage, ISimpleStorage
2424

2525

2626
class InsecureStorageWarning(UserWarning):
@@ -58,6 +58,37 @@ def store(self, path, file_path, *, meta=None):
5858
dest_fp.write(src_fp.read())
5959

6060

61+
@implementer(ISimpleStorage)
62+
class LocalSimpleStorage:
63+
def __init__(self, base):
64+
# This class should not be used in production, it's trivial for it to
65+
# be used to read arbitrary files from the disk. It is intended ONLY
66+
# for local development with trusted users. To make this clear, we'll
67+
# raise a warning.
68+
warnings.warn(
69+
"LocalSimpleStorage is intended only for use in development, you "
70+
"should not use it in production due to the lack of safe guards "
71+
"for safely locating files on disk.",
72+
InsecureStorageWarning,
73+
)
74+
75+
self.base = base
76+
77+
@classmethod
78+
def create_service(cls, context, request):
79+
return cls(request.registry.settings["simple.path"])
80+
81+
def get(self, path):
82+
return open(os.path.join(self.base, path), "rb")
83+
84+
def store(self, path, file_path, *, meta=None):
85+
destination = os.path.join(self.base, path)
86+
print(destination)
87+
os.makedirs(os.path.dirname(destination), exist_ok=True)
88+
with open(destination, "wb") as dest_fp:
89+
with open(file_path, "rb") as src_fp:
90+
dest_fp.write(src_fp.read())
91+
6192
@implementer(IDocsStorage)
6293
class LocalDocsStorage:
6394
def __init__(self, base):
@@ -137,6 +168,37 @@ def store(self, path, file_path, *, meta=None):
137168
self.bucket.upload_file(file_path, path, ExtraArgs=extra_args)
138169

139170

171+
@implementer(ISimpleStorage)
172+
class S3SimpleStorage(GenericFileStorage):
173+
@classmethod
174+
def create_service(cls, context, request):
175+
session = request.find_service(name="aws.session")
176+
s3 = session.resource("s3")
177+
bucket = s3.Bucket(request.registry.settings["files.bucket"])
178+
prefix = request.registry.settings.get("files.prefix")
179+
return cls(bucket, prefix=prefix)
180+
181+
def get(self, path):
182+
# Note: this is not actually used in production, instead our CDN is
183+
# configured to connect directly to our storage bucket. See:
184+
# https://github.com/python/pypi-infra/blob/master/terraform/file-hosting/vcl/main.vcl
185+
try:
186+
return self.bucket.Object(self._get_path(path)).get()["Body"]
187+
except botocore.exceptions.ClientError as exc:
188+
if exc.response["Error"]["Code"] != "NoSuchKey":
189+
raise
190+
raise FileNotFoundError("No such key: {!r}".format(path)) from None
191+
192+
def store(self, path, file_path, *, meta=None):
193+
extra_args = {}
194+
if meta is not None:
195+
extra_args["Metadata"] = meta
196+
197+
path = self._get_path(path)
198+
199+
self.bucket.upload_file(file_path, path, ExtraArgs=extra_args)
200+
201+
140202
@implementer(IDocsStorage)
141203
class S3DocsStorage:
142204
def __init__(self, s3_client, bucket_name, *, prefix=None):
@@ -203,3 +265,38 @@ def store(self, path, file_path, *, meta=None):
203265
if meta is not None:
204266
blob.metadata = meta
205267
blob.upload_from_filename(file_path)
268+
269+
270+
@implementer(ISimpleStorage)
271+
class GCSSimpleStorage(GenericFileStorage):
272+
@classmethod
273+
@google.api_core.retry.Retry(
274+
predicate=google.api_core.retry.if_exception_type(
275+
google.api_core.exceptions.ServiceUnavailable
276+
)
277+
)
278+
def create_service(cls, context, request):
279+
storage_client = request.find_service(name="gcloud.gcs")
280+
bucket_name = request.registry.settings["files.bucket"]
281+
bucket = storage_client.get_bucket(bucket_name)
282+
prefix = request.registry.settings.get("files.prefix")
283+
284+
return cls(bucket, prefix=prefix)
285+
286+
def get(self, path):
287+
# Note: this is not actually used in production, instead our CDN is
288+
# configured to connect directly to our storage bucket. See:
289+
# https://github.com/python/pypi-infra/blob/master/terraform/file-hosting/vcl/main.vcl
290+
raise NotImplementedError
291+
292+
@google.api_core.retry.Retry(
293+
predicate=google.api_core.retry.if_exception_type(
294+
google.api_core.exceptions.ServiceUnavailable
295+
)
296+
)
297+
def store(self, path, file_path, *, meta=None):
298+
path = self._get_path(path)
299+
blob = self.bucket.blob(path)
300+
if meta is not None:
301+
blob.metadata = meta
302+
blob.upload_from_filename(file_path)

warehouse/packaging/utils.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,51 @@
11
import hashlib
2-
import os.path
2+
import tempfile
33

4+
from packaging.version import parse
45
from pyramid_jinja2 import IJinja2Environment
6+
from sqlalchemy.orm import joinedload
57

6-
import warehouse
8+
from warehouse.packaging.interfaces import ISimpleStorage
9+
from warehouse.packaging.models import File, Release
710

8-
from warehouse.legacy.api.simple import _simple_detail
11+
12+
def _simple_detail(project, request):
13+
# Get all of the files for this project.
14+
files = sorted(
15+
request.db.query(File)
16+
.options(joinedload(File.release))
17+
.join(Release)
18+
.filter(Release.project == project)
19+
.all(),
20+
key=lambda f: (parse(f.release.version), f.filename),
21+
)
22+
23+
return {"project": project, "files": files}
924

1025

1126
def render_simple_detail(project, request, store=False):
1227
context = _simple_detail(project, request)
1328

1429
env = request.registry.queryUtility(IJinja2Environment, name=".jinja2")
15-
template = env.get_template("legacy/api/simple/detail.html")
30+
template = env.get_template("templates/legacy/api/simple/detail.html")
1631
content = template.render(**context, request=request)
1732

1833
content_hasher = hashlib.blake2b(digest_size=256 // 8)
1934
content_hasher.update(content.encode("utf-8"))
2035
content_hash = content_hasher.hexdigest().lower()
21-
simple_detail_path = f"/simple/{project.normalized_name}/{content_hash}/"
36+
simple_detail_path = f"{project.normalized_name}/{content_hash}.html"
2237

2338
if store:
24-
# TODO: Store generated file in FileStorage
25-
# We should probably configure a new FileStorage for a new simple-files bucket in GCS
26-
pass
39+
storage = request.find_service(ISimpleStorage)
40+
with tempfile.NamedTemporaryFile() as f:
41+
f.write(content.encode('utf-8'))
42+
storage.store(
43+
simple_detail_path,
44+
f.name,
45+
meta={
46+
"project": project.normalized_name,
47+
"hash": content_hash,
48+
},
49+
)
2750

2851
return (content_hash, simple_detail_path)

0 commit comments

Comments
 (0)