1
1
# In[1]:
2
-
3
- # imports and set configuration
4
2
import pandas as pd
5
- from retrieve_prs_data import run
6
-
7
- exclude_prototype = True
8
- data_filename = "10.0_to_11.0-rc2.json"
9
- previous_release = "v10.0"
10
- current_release = "v11.0-rc2"
11
3
12
4
# In[2]:
13
-
14
-
5
+ data_filename = "data.json"
15
6
df = pd .read_json (data_filename ).T
16
7
df .tail ()
17
8
18
-
19
9
# In[3]:
20
-
21
-
22
10
all_labels = {lbl for labels in df ["labels" ] for lbl in labels }
23
11
all_labels
24
12
25
-
26
13
# In[4]:
27
-
28
-
29
14
# Add one column per label
30
15
for label in all_labels :
31
16
df [label ] = df ["labels" ].apply (lambda labels_list : label in labels_list )
32
17
df .head ()
33
18
34
-
35
19
# In[5]:
36
-
37
-
38
20
# Add a clean "module" column. It contains tuples since PRs can have more than one module.
39
21
# Maybe we should include "topics" in that column as well?
40
22
51
33
df ["module" ] = df .module .apply (tuple )
52
34
df .head ()
53
35
54
-
55
36
# In[6]:
56
-
57
-
58
37
mod_df = df .set_index ("module" ).sort_index ()
59
38
mod_df .tail ()
60
39
61
-
62
40
# In[7]:
63
-
64
-
65
41
# All improvement PRs
66
42
mod_df [mod_df ["enhancement" ]].head ()
67
43
68
-
69
44
# In[8]:
70
-
71
-
72
45
# improvement f module
73
46
# note: don't filter module name on the index as the index contain tuples with non-exclusive values
74
47
# Use the boolean column instead
75
48
mod_df [mod_df ["enhancement" ] & mod_df ["module: transforms" ]]
76
49
77
50
78
51
# In[9]:
79
-
80
-
81
- def format_prs (mod_df ):
52
+ def format_prs (mod_df , exclude_prototype = True ):
82
53
out = []
83
54
for idx , row in mod_df .iterrows ():
84
- if exclude_prototype and row ["prototype" ]:
55
+ if exclude_prototype and "prototype" in row and row ["prototype" ]:
85
56
continue
86
57
modules = idx
87
58
# Put "documentation" and "tests" first for sorting to be dece
@@ -98,8 +69,6 @@ def format_prs(mod_df):
98
69
99
70
100
71
# In[10]:
101
-
102
-
103
72
included_prs = pd .DataFrame ()
104
73
105
74
# If labels are accurate, this shouhld generate most of the release notes already
@@ -112,27 +81,40 @@ def format_prs(mod_df):
112
81
("Bug Fixes" , "bug" ),
113
82
("Code Quality" , "code quality" ),
114
83
):
115
- print (f"## { section_title } " )
116
- print ()
117
- tmp_df = mod_df [mod_df [module_idx ]]
118
- included_prs = pd .concat ([included_prs , tmp_df ])
119
- print (format_prs (tmp_df ))
120
- print ()
84
+ if module_idx in mod_df :
85
+ print (f"## { section_title } " )
86
+ print ()
87
+ tmp_df = mod_df [mod_df [module_idx ]]
88
+ included_prs = pd .concat ([included_prs , tmp_df ])
89
+ print (format_prs (tmp_df ))
90
+ print ()
121
91
122
92
123
93
# In[11]:
124
-
125
-
126
94
# Missing PRs are these ones... classify them manually
127
95
missing_prs = pd .concat ([mod_df , included_prs ]).drop_duplicates (subset = "pr_number" , keep = False )
128
96
print (format_prs (missing_prs ))
129
97
130
98
# In[12]:
131
-
132
99
# Generate list of contributors
133
100
print ()
134
101
print ("## Contributors" )
135
102
136
- command_to_run = f"{{ git shortlog -s { previous_release } ..{ current_release } | cut -f2- & git log -s { previous_release } ..{ current_release } | grep Co-authored | cut -f2- -d: | cut -f1 -d\\ < | sed 's/^ *//;s/ *$//' ; }} | sort --ignore-case | uniq | tr '\\ n' ';' | sed 's/;/, /g;s/, $//' | fold -s"
137
- rc , output , err = run (command_to_run )
138
- print (output )
103
+ previous_release = "c35d3855ccbfa6a36e6ae6337a1f2c721c1f1e78"
104
+ current_release = "5181a854d8b127cf465cd22a67c1b5aaf6ccae05"
105
+ print (
106
+ f"{{ git shortlog -s { previous_release } ..{ current_release } | cut -f2- & git log -s { previous_release } ..{ current_release } | grep Co-authored | cut -f2- -d: | cut -f1 -d\\ < | sed 's/^ *//;s/ *//' ; }} | sort --ignore-case | uniq | tr '\\ n' ';' | sed 's/;/, /g;s/,//' | fold -s"
107
+ )
108
+
109
+ # In[13]:
110
+ # Utility to extract PR numbers only from multiple lines, useful to bundle all
111
+ # the docs changes for example:
112
+ import re
113
+
114
+ s = """
115
+
116
+ [] Remove unnecessary dependency from macOS/Conda binaries (#8077)
117
+ [rocm] [ROCm] remove HCC references (#8070)
118
+ """
119
+
120
+ print (", " .join (re .findall ("(#\\ d+)" , s )))
0 commit comments