|
1 | 1 | import re
|
2 | 2 | import os
|
3 | 3 |
|
| 4 | +from contextlib import ExitStack |
| 5 | +from tarfile import TarFile |
| 6 | +from tempfile import TemporaryDirectory, TemporaryFile |
| 7 | + |
| 8 | +import requests |
| 9 | + |
4 | 10 | from django.core.management import BaseCommand
|
5 | 11 | from django.conf import settings
|
6 | 12 |
|
| 13 | +from dateutil.parser import parse as parsedate |
| 14 | + |
7 | 15 | from peps.converters import (
|
8 |
| - get_pep0_page, get_pep_page, add_pep_image, get_peps_rss |
| 16 | + get_pep0_page, get_pep_page, add_pep_image, get_peps_rss, get_peps_last_updated |
9 | 17 | )
|
10 | 18 |
|
11 | 19 | pep_number_re = re.compile(r'pep-(\d+)')
|
@@ -42,60 +50,82 @@ def verbose(msg):
|
42 | 50 |
|
43 | 51 | verbose("== Starting PEP page generation")
|
44 | 52 |
|
45 |
| - verbose("Generating RSS Feed") |
46 |
| - peps_rss = get_peps_rss() |
47 |
| - if not peps_rss: |
48 |
| - verbose("Could not find generated RSS feed. Skipping.") |
49 |
| - |
50 |
| - verbose("Generating PEP0 index page") |
51 |
| - pep0_page, _ = get_pep0_page() |
52 |
| - if pep0_page is None: |
53 |
| - verbose("HTML version of PEP 0 cannot be generated.") |
54 |
| - return |
55 |
| - |
56 |
| - image_paths = set() |
57 |
| - |
58 |
| - # Find pep pages |
59 |
| - for f in os.listdir(settings.PEP_REPO_PATH): |
60 |
| - |
61 |
| - if self.is_image(f): |
62 |
| - verbose("- Deferring import of image '{}'".format(f)) |
63 |
| - image_paths.add(f) |
64 |
| - continue |
65 |
| - |
66 |
| - # Skip files we aren't looking for |
67 |
| - if not self.is_pep_page(f): |
68 |
| - verbose("- Skipping non-PEP file '{}'".format(f)) |
69 |
| - continue |
70 |
| - |
71 |
| - if 'pep-0000.html' in f: |
72 |
| - verbose("- Skipping duplicate PEP0 index") |
73 |
| - continue |
74 |
| - |
75 |
| - verbose("Generating PEP Page from '{}'".format(f)) |
76 |
| - pep_match = pep_number_re.match(f) |
77 |
| - if pep_match: |
78 |
| - pep_number = pep_match.groups(1)[0] |
79 |
| - p = get_pep_page(pep_number) |
80 |
| - if p is None: |
81 |
| - verbose( |
82 |
| - "- HTML version PEP {!r} cannot be generated.".format( |
83 |
| - pep_number |
| 53 | + with ExitStack() as stack: |
| 54 | + verbose(f"== Fetching PEP artifact from {settings.PEP_ARTIFACT_URL}") |
| 55 | + peps_last_updated = get_peps_last_updated() |
| 56 | + with requests.get(settings.PEP_ARTIFACT_URL, stream=True) as r: |
| 57 | + artifact_last_modified = parsedate(r.headers['last-modified']) |
| 58 | + if peps_last_updated > artifact_last_modified: |
| 59 | + verbose(f"== No update to artifacts, we're done here!") |
| 60 | + return |
| 61 | + |
| 62 | + temp_file = stack.enter_context(TemporaryFile()) |
| 63 | + for chunk in r.iter_content(chunk_size=8192): |
| 64 | + if chunk: |
| 65 | + temp_file.write(chunk) |
| 66 | + |
| 67 | + temp_file.seek(0) |
| 68 | + |
| 69 | + temp_dir = stack.enter_context(TemporaryDirectory()) |
| 70 | + tar_ball = stack.enter_context(TarFile.open(fileobj=temp_file, mode='r:gz')) |
| 71 | + tar_ball.extractall(path=temp_dir, numeric_owner=False) |
| 72 | + |
| 73 | + artifacts_path = os.path.join(temp_dir, 'peps') |
| 74 | + |
| 75 | + verbose("Generating RSS Feed") |
| 76 | + peps_rss = get_peps_rss(artifacts_path) |
| 77 | + if not peps_rss: |
| 78 | + verbose("Could not find generated RSS feed. Skipping.") |
| 79 | + |
| 80 | + verbose("Generating PEP0 index page") |
| 81 | + pep0_page, _ = get_pep0_page(artifacts_path) |
| 82 | + if pep0_page is None: |
| 83 | + verbose("HTML version of PEP 0 cannot be generated.") |
| 84 | + return |
| 85 | + |
| 86 | + image_paths = set() |
| 87 | + |
| 88 | + # Find pep pages |
| 89 | + for f in os.listdir(artifacts_path): |
| 90 | + |
| 91 | + if self.is_image(f): |
| 92 | + verbose("- Deferring import of image '{}'".format(f)) |
| 93 | + image_paths.add(f) |
| 94 | + continue |
| 95 | + |
| 96 | + # Skip files we aren't looking for |
| 97 | + if not self.is_pep_page(f): |
| 98 | + verbose("- Skipping non-PEP file '{}'".format(f)) |
| 99 | + continue |
| 100 | + |
| 101 | + if 'pep-0000.html' in f: |
| 102 | + verbose("- Skipping duplicate PEP0 index") |
| 103 | + continue |
| 104 | + |
| 105 | + verbose("Generating PEP Page from '{}'".format(f)) |
| 106 | + pep_match = pep_number_re.match(f) |
| 107 | + if pep_match: |
| 108 | + pep_number = pep_match.groups(1)[0] |
| 109 | + p = get_pep_page(artifacts_path, pep_number) |
| 110 | + if p is None: |
| 111 | + verbose( |
| 112 | + "- HTML version PEP {!r} cannot be generated.".format( |
| 113 | + pep_number |
| 114 | + ) |
84 | 115 | )
|
85 |
| - ) |
86 |
| - verbose("====== Title: '{}'".format(p.title)) |
87 |
| - else: |
88 |
| - verbose("- Skipping invalid '{}'".format(f)) |
89 |
| - |
90 |
| - # Find pep images. This needs to happen afterwards, because we need |
91 |
| - for img in image_paths: |
92 |
| - pep_match = pep_number_re.match(img) |
93 |
| - if pep_match: |
94 |
| - pep_number = pep_match.groups(1)[0] |
95 |
| - verbose("Generating image for PEP {} at '{}'".format( |
96 |
| - pep_number, img)) |
97 |
| - add_pep_image(pep_number, img) |
98 |
| - else: |
99 |
| - verbose("- Skipping non-PEP related image '{}'".format(img)) |
| 116 | + verbose("====== Title: '{}'".format(p.title)) |
| 117 | + else: |
| 118 | + verbose("- Skipping invalid '{}'".format(f)) |
| 119 | + |
| 120 | + # Find pep images. This needs to happen afterwards, because we need |
| 121 | + for img in image_paths: |
| 122 | + pep_match = pep_number_re.match(img) |
| 123 | + if pep_match: |
| 124 | + pep_number = pep_match.groups(1)[0] |
| 125 | + verbose("Generating image for PEP {} at '{}'".format( |
| 126 | + pep_number, img)) |
| 127 | + add_pep_image(artifacts_path, pep_number, img) |
| 128 | + else: |
| 129 | + verbose("- Skipping non-PEP related image '{}'".format(img)) |
100 | 130 |
|
101 | 131 | verbose("== Finished")
|
0 commit comments