Skip to content

Commit 4ca1b05

Browse files
committed
update generate_pep_pages to consume results of python/peps#898
1 parent dff4b6a commit 4ca1b05

File tree

4 files changed

+105
-83
lines changed

4 files changed

+105
-83
lines changed

peps/converters.py

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import functools
2+
import datetime
23
import re
34
import os
45

@@ -7,47 +8,41 @@
78
from django.conf import settings
89
from django.core.exceptions import ImproperlyConfigured
910
from django.core.files import File
11+
from django.db.models import Max
1012

1113
from pages.models import Page, Image
1214

1315
PEP_TEMPLATE = 'pages/pep-page.html'
1416
pep_url = lambda num: 'dev/peps/pep-{}/'.format(num)
1517

1618

17-
def check_paths(func):
18-
"""Ensure that our PEP_REPO_PATH is setup correctly."""
19-
@functools.wraps(func)
20-
def wrapped(*args, **kwargs):
21-
if not hasattr(settings, 'PEP_REPO_PATH'):
22-
raise ImproperlyConfigured('No PEP_REPO_PATH in settings')
23-
if not os.path.exists(settings.PEP_REPO_PATH):
24-
raise ImproperlyConfigured('Path set as PEP_REPO_PATH does not exist')
25-
return func(*args, **kwargs)
26-
return wrapped
19+
def get_peps_last_updated():
20+
return Page.objects.filter(
21+
path__startswith='dev/peps',
22+
).aggregate(Max('updated'))['updated__max']
2723

2824

29-
@check_paths
30-
def convert_pep0():
25+
def convert_pep0(artifact_path):
3126
"""
3227
Take existing generated pep-0000.html and convert to something suitable
3328
for a Python.org Page returns the core body HTML necessary only
3429
"""
35-
pep0_path = os.path.join(settings.PEP_REPO_PATH, 'pep-0000.html')
30+
pep0_path = os.path.join(artifact_path, 'pep-0000.html')
3631
pep0_content = open(pep0_path).read()
3732
data = convert_pep_page(0, pep0_content)
3833
if data is None:
3934
return
4035
return data['content']
4136

4237

43-
def get_pep0_page(commit=True):
38+
def get_pep0_page(artifact_path, commit=True):
4439
"""
4540
Using convert_pep0 above, create a CMS ready pep0 page and return it
4641
4742
pep0 is used as the directory index, but it's also an actual pep, so we
4843
return both Page objects.
4944
"""
50-
pep0_content = convert_pep0()
45+
pep0_content = convert_pep0(artifact_path)
5146
if pep0_content is None:
5247
return None, None
5348
pep0_page, _ = Page.objects.get_or_create(path='dev/peps/')
@@ -88,7 +83,6 @@ def fix_headers(soup, data):
8883
return soup, data
8984

9085

91-
@check_paths
9286
def convert_pep_page(pep_number, content):
9387
"""
9488
Handle different formats that pep2html.py outputs
@@ -163,12 +157,12 @@ def convert_pep_page(pep_number, content):
163157
return data
164158

165159

166-
def get_pep_page(pep_number, commit=True):
160+
def get_pep_page(artifact_path, pep_number, commit=True):
167161
"""
168162
Given a pep_number retrieve original PEP source text, rst, or html.
169163
Get or create the associated Page and return it
170164
"""
171-
pep_path = os.path.join(settings.PEP_REPO_PATH, 'pep-{}.html'.format(pep_number))
165+
pep_path = os.path.join(artifact_path, 'pep-{}.html'.format(pep_number))
172166
if not os.path.exists(pep_path):
173167
print("PEP Path '{}' does not exist, skipping".format(pep_path))
174168
return
@@ -177,7 +171,7 @@ def get_pep_page(pep_number, commit=True):
177171
if pep_content is None:
178172
return None
179173
pep_rst_source = os.path.join(
180-
settings.PEP_REPO_PATH, 'pep-{}.rst'.format(pep_number),
174+
artifact_path, 'pep-{}.rst'.format(pep_number),
181175
)
182176
pep_ext = '.rst' if os.path.exists(pep_rst_source) else '.txt'
183177
source_link = 'https://github.com/python/peps/blob/master/pep-{}{}'.format(
@@ -198,8 +192,8 @@ def get_pep_page(pep_number, commit=True):
198192
return pep_page
199193

200194

201-
def add_pep_image(pep_number, path):
202-
image_path = os.path.join(settings.PEP_REPO_PATH, path)
195+
def add_pep_image(artifact_path, pep_number, path):
196+
image_path = os.path.join(artifact_path, path)
203197
if not os.path.exists(image_path):
204198
print("Image Path '{}' does not exist, skipping".format(image_path))
205199
return
@@ -251,9 +245,8 @@ def add_pep_image(pep_number, path):
251245
return image
252246

253247

254-
@check_paths
255-
def get_peps_rss():
256-
rss_feed = os.path.join(settings.PEP_REPO_PATH, 'peps.rss')
248+
def get_peps_rss(artifact_path):
249+
rss_feed = os.path.join(artifact_path, 'peps.rss')
257250
if not os.path.exists(rss_feed):
258251
return
259252

Lines changed: 85 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
import re
22
import os
33

4+
from contextlib import ExitStack
5+
from tarfile import TarFile
6+
from tempfile import TemporaryDirectory, TemporaryFile
7+
8+
import requests
9+
410
from django.core.management import BaseCommand
511
from django.conf import settings
612

13+
from dateutil.parser import parse as parsedate
14+
715
from peps.converters import (
8-
get_pep0_page, get_pep_page, add_pep_image, get_peps_rss
16+
get_pep0_page, get_pep_page, add_pep_image, get_peps_rss, get_peps_last_updated
917
)
1018

1119
pep_number_re = re.compile(r'pep-(\d+)')
@@ -42,60 +50,82 @@ def verbose(msg):
4250

4351
verbose("== Starting PEP page generation")
4452

45-
verbose("Generating RSS Feed")
46-
peps_rss = get_peps_rss()
47-
if not peps_rss:
48-
verbose("Could not find generated RSS feed. Skipping.")
49-
50-
verbose("Generating PEP0 index page")
51-
pep0_page, _ = get_pep0_page()
52-
if pep0_page is None:
53-
verbose("HTML version of PEP 0 cannot be generated.")
54-
return
55-
56-
image_paths = set()
57-
58-
# Find pep pages
59-
for f in os.listdir(settings.PEP_REPO_PATH):
60-
61-
if self.is_image(f):
62-
verbose("- Deferring import of image '{}'".format(f))
63-
image_paths.add(f)
64-
continue
65-
66-
# Skip files we aren't looking for
67-
if not self.is_pep_page(f):
68-
verbose("- Skipping non-PEP file '{}'".format(f))
69-
continue
70-
71-
if 'pep-0000.html' in f:
72-
verbose("- Skipping duplicate PEP0 index")
73-
continue
74-
75-
verbose("Generating PEP Page from '{}'".format(f))
76-
pep_match = pep_number_re.match(f)
77-
if pep_match:
78-
pep_number = pep_match.groups(1)[0]
79-
p = get_pep_page(pep_number)
80-
if p is None:
81-
verbose(
82-
"- HTML version PEP {!r} cannot be generated.".format(
83-
pep_number
53+
with ExitStack() as stack:
54+
verbose(f"== Fetching PEP artifact from {settings.PEP_ARTIFACT_URL}")
55+
peps_last_updated = get_peps_last_updated()
56+
with requests.get(settings.PEP_ARTIFACT_URL, stream=True) as r:
57+
artifact_last_modified = parsedate(r.headers['last-modified'])
58+
if peps_last_updated > artifact_last_modified:
59+
verbose(f"== No update to artifacts, we're done here!")
60+
return
61+
62+
temp_file = stack.enter_context(TemporaryFile())
63+
for chunk in r.iter_content(chunk_size=8192):
64+
if chunk:
65+
temp_file.write(chunk)
66+
67+
temp_file.seek(0)
68+
69+
temp_dir = stack.enter_context(TemporaryDirectory())
70+
tar_ball = stack.enter_context(TarFile.open(fileobj=temp_file, mode='r:gz'))
71+
tar_ball.extractall(path=temp_dir, numeric_owner=False)
72+
73+
artifacts_path = os.path.join(temp_dir, 'peps')
74+
75+
verbose("Generating RSS Feed")
76+
peps_rss = get_peps_rss(artifacts_path)
77+
if not peps_rss:
78+
verbose("Could not find generated RSS feed. Skipping.")
79+
80+
verbose("Generating PEP0 index page")
81+
pep0_page, _ = get_pep0_page(artifacts_path)
82+
if pep0_page is None:
83+
verbose("HTML version of PEP 0 cannot be generated.")
84+
return
85+
86+
image_paths = set()
87+
88+
# Find pep pages
89+
for f in os.listdir(artifacts_path):
90+
91+
if self.is_image(f):
92+
verbose("- Deferring import of image '{}'".format(f))
93+
image_paths.add(f)
94+
continue
95+
96+
# Skip files we aren't looking for
97+
if not self.is_pep_page(f):
98+
verbose("- Skipping non-PEP file '{}'".format(f))
99+
continue
100+
101+
if 'pep-0000.html' in f:
102+
verbose("- Skipping duplicate PEP0 index")
103+
continue
104+
105+
verbose("Generating PEP Page from '{}'".format(f))
106+
pep_match = pep_number_re.match(f)
107+
if pep_match:
108+
pep_number = pep_match.groups(1)[0]
109+
p = get_pep_page(artifacts_path, pep_number)
110+
if p is None:
111+
verbose(
112+
"- HTML version PEP {!r} cannot be generated.".format(
113+
pep_number
114+
)
84115
)
85-
)
86-
verbose("====== Title: '{}'".format(p.title))
87-
else:
88-
verbose("- Skipping invalid '{}'".format(f))
89-
90-
# Find pep images. This needs to happen afterwards, because we need
91-
for img in image_paths:
92-
pep_match = pep_number_re.match(img)
93-
if pep_match:
94-
pep_number = pep_match.groups(1)[0]
95-
verbose("Generating image for PEP {} at '{}'".format(
96-
pep_number, img))
97-
add_pep_image(pep_number, img)
98-
else:
99-
verbose("- Skipping non-PEP related image '{}'".format(img))
116+
verbose("====== Title: '{}'".format(p.title))
117+
else:
118+
verbose("- Skipping invalid '{}'".format(f))
119+
120+
# Find pep images. This needs to happen afterwards, because we need
121+
for img in image_paths:
122+
pep_match = pep_number_re.match(img)
123+
if pep_match:
124+
pep_number = pep_match.groups(1)[0]
125+
verbose("Generating image for PEP {} at '{}'".format(
126+
pep_number, img))
127+
add_pep_image(artifacts_path, pep_number, img)
128+
else:
129+
verbose("- Skipping non-PEP related image '{}'".format(img))
100130

101131
verbose("== Finished")

pydotorg/settings/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@
227227
MAILING_LIST_PSF_MEMBERS = "[email protected]"
228228

229229
### PEP Repo Location
230-
PEP_REPO_PATH = ''
230+
PEP_ARTIFACT_URL = 'https://pythondotorg-assets-staging.s3.amazonaws.com/peps.tar.gz'
231231

232232
### Fastly ###
233233
FASTLY_API_KEY = False # Set to Fastly API key in production to allow pages to

pydotorg/settings/local.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424

2525
EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
2626

27-
# Set the path to where the PEP repo's HTML source files are located
28-
# For example, PEP_REPO_PATH = '/Users/frank/work/src/pythondotorg/tmp/peps'
29-
PEP_REPO_PATH = ''
27+
# Set the URL to where to fetch PEP artifacts from
28+
PEP_ARTIFACT_URL = 'https://pythondotorg-assets-staging.s3.amazonaws.com/peps.tar.gz'
3029

3130
# Use Dummy SASS compiler to avoid performance issues and remove the need to
3231
# have a sass compiler installed at all during local development if you aren't

0 commit comments

Comments
 (0)