-
-
Notifications
You must be signed in to change notification settings - Fork 88
/
Copy pathinsert_contributor_content.py
312 lines (271 loc) · 12.2 KB
/
insert_contributor_content.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
"""
This script generates markdown from the user-contributed YAML file,
enriched with data from the GitHub API, and inserts it into the readme.
Python 3.6+ is required
Environment Variables (all optional)
- LOG_LEVEL: The log level to use: info | warn | error (default: INFO).
- GH_ACCESS_TOKEN: The GitHub API token used for authentication.
- REPO_OWNER: The username / org where the repository is located.
- REPO_NAME: The name of the repository.
"""
# Imports
import os
import re
import yaml
import json
import html
import time
import logging
import requests
from typing import Dict, List, Union, Optional
from requests.exceptions import RequestException
# Constants
""" The username / org where the repository is located """
REPO_OWNER = os.environ.get("REPO_OWNER", "lissy93")
""" The name of the repository """
REPO_NAME = os.environ.get("REPO_NAME", "git-into-open-source")
""" A GitHub access token, required for higher rate-limit when fetching data """
GH_ACCESS_TOKEN = os.environ.get("GH_ACCESS_TOKEN", None)
""" Used for users who don't have a GitHub profile picture """
PLACEHOLDER_PROFILE_PICTURE = "https://i.ibb.co/X231Rq8/octo-no-one.png"
""" The directory where this script is located """
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
""" The relative path to the markdown file to update"""
README_PATH = os.path.join(SCRIPT_DIR, "..", ".github/README.md")
""" The relative path to the YAML file containing the user-contributed content """
CONTRIBUTORS_FILE_PATH = os.path.join(SCRIPT_DIR, "..", "git-in-here.yml")
""" Need to use a cache file to avoid hitting the rate limit """
CACHE_FILE = os.path.join(SCRIPT_DIR, "cache.json")
def read_cache() -> dict:
"""
Read cached GitHub data from the local file.
"""
if os.path.exists(CACHE_FILE):
with open(CACHE_FILE, 'r') as f:
return json.load(f)
return {}
def write_cache(cache: dict) -> None:
"""
Write GitHub data to cache file.
"""
with open(CACHE_FILE, 'w') as f:
json.dump(cache, f)
def handle_rate_limit(response) -> None:
if response.status_code == 429 or response.status_code == 403:
try:
reset_time = int(response.headers.get('X-RateLimit-Reset'))
except ValueError:
logger.error("Rate limit likely exceeded, but no reset time provided.")
return
sleep_duration = reset_time - int(time.time()) + 1
if sleep_duration > 30:
logger.warning(f"Rate limit exceeded - quota will reset in {sleep_duration} seconds. "
"Skipping, as this is too long to wait.")
else:
logger.info(f"Sleeping for {sleep_duration} seconds")
time.sleep(sleep_duration)
# Configure Logging
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=LOG_LEVEL)
logger = logging.getLogger(__name__)
def read_file(file_path: str, mode: str = "r") -> str:
"""
Opens, reads and returns the contents of a given file.
:param file_path: The path to the file.
:param mode: The mode to open the file in.
:return: The contents of the file.
"""
try:
with open(file_path, mode) as f:
logger.info(f"Reading file: {file_path}")
return f.read()
except FileNotFoundError:
logger.error(f"Error: File {file_path} not found.")
exit(1)
def write_file(file_path: str, content: str, mode: str = "w") -> None:
"""
Opens a given file and writes the given content to it.
:param file_path: The path to the file.
:param content: The content to write to the file.
:param mode: The mode to open the file in.
"""
with open(file_path, mode) as f:
logging.info(f"Writing to file: {file_path}")
f.write(content)
def get_profile_picture(username: str) -> str:
"""
Returns either the URL to a users profile, or a placeholder image.
:param username: The username of the GitHub user.
:return: The URL of the profile picture of the GitHub user.
"""
logger.info(f"Using backup method to fetch profile picture for user: {username}")
url_to_check = f"https://github.com/{username}.png"
response = requests.get(url_to_check)
if response.status_code == 200:
return url_to_check
return PLACEHOLDER_PROFILE_PICTURE
def fetch_github_info(username: str) -> Dict[str, Union[str, None]]:
"""
Fetches the name and avatar URL of a GitHub user.
:param username: The username of the GitHub user.
:return: A dictionary containing the name and avatar URL of the GitHub user.
"""
logging.info(f"Fetching GitHub info for user: {username}")
headers = {"Accept": "application/vnd.github.v3+json"}
if GH_ACCESS_TOKEN:
headers["Authorization"] = f"token {GH_ACCESS_TOKEN}"
url = f"https://api.github.com/users/{username}"
response = requests.get(url, headers=headers)
handle_rate_limit(response)
if response.status_code != 200:
logger.error(f"GitHub API returned {response.status_code} for @{username}.")
return { "name": username, "avatar_url": get_profile_picture(username) }
logger.info(f"Successfully fetched GitHub info for user: {username}")
return response.json()
def map_question_to_heading(question: str) -> str:
"""
Maps the question to a heading that will be displayed in the README.
:param question: The question to map.
:return: The mapped question.
"""
question_mappings = {
"Why do you want to get into open source?": "I want to get into open source because:",
"What's the coolest open source project you've ever used or come across?": "The coolest open source project I've ever come across is:",
"What advice would you give to someone new to open source?": "The advice I would give to someone new to open source is:",
"Have you ever built or contributed to a project that you'd like to share with us here?": "I've built or contributed to a project that I'd like to share here, which is:",
"Which tech (tools, languages, libraries, etc) do you most use or love?": "The tech (tools, languages, libraries, etc) I most use and love is:",
"What are your go-to resources, for learning new things in open source?": "My go-to resources for learning new things in open source are:",
"What's the most rewarding thing you've experienced in your open-source journey?": "The most rewarding thing I've experienced in my open-source journey is:",
"How do you balance open source work, alongside your day job?": "I balance open source work alongside my day job by:",
"Where do you see open source going in the future?": "I see open source going in the direction of:"
}
return question_mappings.get(question, question)
def fetch_all_stargazers(
repo_owner: str, repo_name: str, access_token: Optional[str] = None
) -> List[str]:
"""
Fetches all stargazers of a given GitHub repository.
:param repo_owner: The owner of the repository.
:param repo_name: The name of the repository.
:param access_token: An optional access token to authenticate with the GitHub API.
:return: A list of all stargazers of the repository.
"""
logging.info("Fetching all stargazers of the repository")
stargazers: List[str] = []
url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/stargazers"
headers = {"Accept": "application/vnd.github.v3+json"}
def get_next_url(headers: dict) -> Optional[str]:
links = headers.get("Link", "").split(",")
for link in links:
if 'rel="next"' in link:
return link.split(";")[0].replace("<", "").replace(">", "").strip()
return None
if GH_ACCESS_TOKEN:
headers["Authorization"] = f"token {GH_ACCESS_TOKEN}"
while url:
try:
res = requests.get(url, headers=headers, params={"per_page": 100})
if res.status_code == 200:
stargazers += [user["login"] for user in res.json()]
url = get_next_url(res.headers)
else:
break
except RequestException:
break
return stargazers
def format_url(url: str) -> str:
"""
Makes URLs to users blogs and twitter profiles look nicer.
Removes www., http://, https:// and trailing slashes from a URL.
Also limits to 25 characters and replaces with ellipse if longer
"""
url = url.replace("www.", "").replace("http://", "").replace("https://", "")
url = url.rstrip("/")
return url[:25] + (url[25:] and "...")
def format_bio_text(bio: str) -> str:
"""
Returns only a-z, A-Z, 0-9, spaces, and basic punctuation.
"""
return html.escape(re.sub(r"[^a-zA-Z0-9 ]", "", bio).strip())
def build_markdown_content(
contributors: List[Dict[str, str]], stargazers: List[str]
) -> str:
"""
Use the content returned from the YAML + complimentary data (like stargazers),
to build the markdown content that will be inserted into the README.
:param contributors: The list of contributors from the YAML file.
:param stargazers: The list of stargazers of the repository.
:return: The markdown content to be inserted into the README.
"""
if not contributors:
logger.info(f"No contributors found yet, cancelling markdown generation")
return ""
md_content = "User | Contribution\n---|---\n"
for contributor in reversed(contributors):
username = contributor["username"]
question = contributor["question"]
response = contributor["response"]
question_heading = map_question_to_heading(question)
info = fetch_github_info(username)
name = info["name"] if info["name"] else username
picture = info.get("avatar_url", PLACEHOLDER_PROFILE_PICTURE)
is_stargazer = " ⭐" if username.lower() in [sg.lower() for sg in stargazers] else ""
blog = f"<br /><sup>🌐 [{format_url(info['blog'])}]({info['blog']})</sup>" if info.get("blog") else ""
bio = format_bio_text(info["bio"]) if info.get("bio") else ""
if info.get("public_repos") and info.get("followers") and info.get("following"):
stats = (
f"<br /><kbd title='Followers, following and repo count for {username}'>"
f"🫂 {info['followers']} ┃ "
f"👣 {info['following']} ┃ "
f"🗃 {info['public_repos']}</kbd>"
)
else:
stats = ""
if info.get("twitter_username") and not blog:
twitter = (
f"<br /><sup>🐦 [@{format_url(info['twitter_username'])}]"
f"(https://x.com/{info['twitter_username']})</sup>"
)
else:
twitter = ""
# Put it all together!
md_content += (
f"<a href='https://github.com/{username}' title='{bio}'>{name}{is_stargazer}<br />"
f"<img src='{picture}' width='80' /></a> {stats} {blog} {twitter} | "
f"**{question_heading}**<br />{response}\n"
)
time.sleep(0.5) # Reduce rate-limiting from GitHub API
return md_content
""" The main entrypoint of the script """
if __name__ == "__main__":
# Fetch list of stargazers (to insert special emoji next to name)
all_stargazers = fetch_all_stargazers(REPO_OWNER, REPO_NAME, GH_ACCESS_TOKEN)
# Read YAML
yaml_content = yaml.safe_load(read_file(CONTRIBUTORS_FILE_PATH))
# Read current README
readme_content = read_file(README_PATH)
# Generate content to be inserted
new_md_content = build_markdown_content(
yaml_content["contributors"], all_stargazers
)
# Locate and replace content in README
start_marker = "<!-- git-in-start -->"
end_marker = "<!-- git-in-end -->"
start_index = readme_content.find(start_marker) + len(start_marker)
end_index = readme_content.find(end_marker)
# Create new readme (from old readme + new content)
new_readme_content = (
readme_content[:start_index]
+ "\n"
+ new_md_content
+ readme_content[end_index:]
)
# Write readme content back to file
write_file(README_PATH, new_readme_content)
logging.info("All Done!")
"""
Okay, you've got this far...
As you can tell this is a pretty quickly-put-together and hacky script
And there's definitely plenty of room for improvement!
So if you're up for it, feel free to submit a PR :)
"""