Skip to content

Commit 6647435

Browse files
authored
Merge pull request #11 from scipy-conference/feat/make-runnable
feat: Make notebooks runnable on previous data
2 parents 77cee6a + 03ff4ca commit 6647435

File tree

7 files changed

+74
-30
lines changed

7 files changed

+74
-30
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ venv/
1010
*.ipynb
1111
# pixi environments
1212
.pixi
13+
14+
data/

README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,37 @@ The checks and tests are all run using Github actions on every pull request and
1313

1414
This repository is setup for Python 3.11. To customize that, change the `VARIANT` argument in `.devcontainer/devcontainer.json`, change the config options in `.precommit-config.yaml` and change the version number in `.github/workflows/python.yaml`.
1515

16+
## Assign Reviewers
17+
18+
First download the following files from Pretalx into the `data/` directory:
19+
20+
* `scipy_reviewers.csv` # people who signed up as reviewers
21+
* `sessions.csv` # all proposal exported from pretalx
22+
* `speakers.csv` # all speakers exported from pretalx
23+
* `pretalx_reviewers.csv` # all reviewers copy-pasted from pretalx
24+
* `scipy_coi_export.csv` # all responses to the coi form
25+
* `coi_authors.csv` # copy pasted values of author names from coi form
26+
* `tracks.csv` # manually entered track IDs
27+
28+
Then run the notebooks as Python files in the following order with `pixi`
29+
30+
```
31+
$ pixi run pre-processing
32+
$ pixi run assignments
33+
```
34+
35+
or run the notebooks manually as Jupyter notebooks either by asking for a JupyterLab instance
36+
37+
```
38+
$ pixi run jupyter lab
39+
```
40+
41+
or just getting a shell
42+
43+
```
44+
$ pixi shell
45+
```
46+
1647
## Development instructions
1748

1849
## With devcontainer

assign_reviews.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
####################
55
# Imports
66
import json
7+
from pathlib import Path
78

89
import numpy as np
910
from scipy.optimize import Bounds, LinearConstraint, milp
@@ -120,7 +121,7 @@ def solve_milp(
120121
############################
121122
## FORMAT AND OUTPUT DATA ##
122123
############################
123-
def format_and_output_result(df_reviewers, df_submissions, solution, post_fix=""):
124+
def format_and_output_result(df_reviewers, df_submissions, solution, post_fix="", output_dir=Path.cwd() / "output"):
124125
reviewers = df_reviewers.to_dict("records")
125126
submissions = df_submissions.to_dict("records")
126127

@@ -140,20 +141,20 @@ def format_and_output_result(df_reviewers, df_submissions, solution, post_fix=""
140141
if DEBUG:
141142
result = {reviewer["reviewer_id"]: sorted(reviewer["is_tutorial"]) for reviewer in reviewers}
142143

143-
with open(f"output/review-assignments-debug{post_fix}.json", "w") as fp:
144+
with open(output_dir / f"review-assignments-debug{post_fix}.json", "w") as fp:
144145
fp.write(json.dumps(result, indent=4))
145146

146147
result = {reviewer["reviewer_id"]: reviewer["assigned_submission_ids"] for reviewer in reviewers}
147148

148-
with open(f"output/review-assignments{post_fix}.json", "w") as fp:
149+
with open(output_dir / f"review-assignments{post_fix}.json", "w") as fp:
149150
fp.write(json.dumps(result, indent=4))
150151

151152
for submission, assignments in zip(submissions, solution.T):
152153
submission["assigned_reviewer_ids"] = df_reviewers.reviewer_id[assignments].values.tolist()
153154

154155
result = {submission["submission_id"]: submission["assigned_reviewer_ids"] for submission in submissions}
155156

156-
with open(f"output/submission-assignments{post_fix}.json", "w") as fp:
157+
with open(output_dir / f"submission-assignments{post_fix}.json", "w") as fp:
157158
fp.write(json.dumps(result, indent=4))
158159

159160
return reviewers, submissions

data/.gitkeep

Whitespace-only changes.

notebooks/pre-processing.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,29 +19,34 @@
1919
# name: python
2020
# nbconvert_exporter: python
2121
# pygments_lexer: ipython3
22-
# version: 3.12.1
22+
# version: 3.12.2
2323
# ---
2424

2525
# %%
26+
from pathlib import Path
27+
2628
import duckdb
2729
from IPython import display
2830

31+
# %%
32+
data_dir = Path.cwd() / ".." / "data"
33+
2934
# Raw data to import
3035
raw_files = dict(
31-
scipy_reviewers="../data/scipy_reviewers.csv", # people who signed up as reviewers
32-
pretalx_sessions="../data/sessions.csv", # all proposal exported from pretalx
33-
pretalx_speakers="../data/speakers.csv", # all speakers exported from pretalx
34-
pretalx_reviewers="../data/pretalx_reviewers.csv", # all reviewers copy-pasted from pretalx
35-
coi_reviewers="../data/scipy_coi_export.csv", # all responses to the coi form
36-
coi_authors="../data/coi_authors.csv", # copy pasted values of author names from coi form
37-
tracks="../data/tracks.csv", # manually entered track IDs
36+
scipy_reviewers=data_dir / "scipy_reviewers.csv", # people who signed up as reviewers
37+
pretalx_sessions=data_dir / "sessions.csv", # all proposal exported from pretalx
38+
pretalx_speakers=data_dir / "speakers.csv", # all speakers exported from pretalx
39+
pretalx_reviewers=data_dir / "pretalx_reviewers.csv", # all reviewers copy-pasted from pretalx
40+
coi_reviewers=data_dir / "scipy_coi_export.csv", # all responses to the coi form
41+
coi_authors=data_dir / "coi_authors.csv", # copy pasted values of author names from coi form
42+
tracks=data_dir / "tracks.csv", # manually entered track IDs
3843
)
3944

4045
# Output
41-
database_file = "../data/assign_reviews.db"
46+
database_file = data_dir / "assign_reviews.db"
4247

4348
# %%
44-
con = duckdb.connect(database_file)
49+
con = duckdb.connect(str(database_file))
4550

4651

4752
# %%
@@ -141,7 +146,7 @@ def create_and_show_table(file_name, table_name, show=True):
141146
# Reviewers who signed up for pretalx but did not fill in COI
142147

143148
# %%
144-
con = duckdb.connect(database_file)
149+
con = duckdb.connect(str(database_file))
145150

146151
# %%
147152
df = con.sql(
@@ -359,7 +364,7 @@ def create_and_show_table(file_name, table_name, show=True):
359364
con.sql("table reviewers_with_tracks").df()
360365

361366
# %%
362-
con.sql("select email as reviewer_id, list(track_id) as tracks from reviewers_with_tracks group by email")
367+
con.sql("select email as reviewer_id, list(track_ids) as tracks from reviewers_with_tracks group by email")
363368

364369
# %% [markdown]
365370
# # Final tables for script
@@ -411,5 +416,3 @@ def create_and_show_table(file_name, table_name, show=True):
411416

412417
# %%
413418
con.close()
414-
415-
# %%

notebooks/run-assignments.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@
1919
# name: python
2020
# nbconvert_exporter: python
2121
# pygments_lexer: ipython3
22-
# version: 3.12.1
22+
# version: 3.12.2
2323
# ---
2424

2525
# %%
2626
####################
2727
## ASSIGN REVIEWS ##
2828
####################
29-
# Imports
3029
import json
3130
import sys
31+
from pathlib import Path
3232

3333
import duckdb
3434
import pandas as pd
@@ -40,16 +40,18 @@
4040
# # Start script
4141

4242
# %%
43-
# mkdir output
43+
data_dir = Path().cwd() / ".." / "data"
44+
output_dir = Path().cwd() / ".." / "output"
45+
output_dir.mkdir(exist_ok=True)
4446

4547
# %%
4648
ASSIGN_TUTORIALS_TO_ANYONE = False
4749
TUTORIAL_COEFF = 0.8
4850

4951
DEBUG = True
5052

51-
database_file = "../data/assign_reviews.db"
52-
con = duckdb.connect(database_file)
53+
database_file = data_dir / "assign_reviews.db"
54+
con = duckdb.connect(str(database_file))
5355
df_submissions = con.sql("table submissions_to_assign").df()
5456
df_reviewers = con.sql("table reviewers_to_assign").df()
5557

@@ -82,7 +84,9 @@
8284
TUTORIAL_COEFF,
8385
ASSIGN_TUTORIALS_TO_ANYONE,
8486
)
85-
reviewers, submissions = format_and_output_result(df_reviewers, df_submissions_tutorials, solution, post_fix="00")
87+
reviewers, submissions = format_and_output_result(
88+
df_reviewers, df_submissions_tutorials, solution, post_fix="00", output_dir=output_dir
89+
)
8690

8791
# %%
8892
df = pd.DataFrame(reviewers)
@@ -141,7 +145,7 @@
141145
)
142146
if solution is not None:
143147
reviewers, submissions = format_and_output_result(
144-
df_reviewers_no_submissions, df_submissions_no_tutorials, solution, post_fix="01"
148+
df_reviewers_no_submissions, df_submissions_no_tutorials, solution, post_fix="01", output_dir=output_dir
145149
)
146150

147151
# %%
@@ -217,7 +221,7 @@
217221

218222
if solution is not None:
219223
reviewers, submissions = format_and_output_result(
220-
df_reviewers_only_tut, df_submissions_few_reviewers, solution, post_fix="02"
224+
df_reviewers_only_tut, df_submissions_few_reviewers, solution, post_fix="02", output_dir=output_dir
221225
)
222226

223227
# %%
@@ -314,17 +318,18 @@
314318
# ## Final export
315319

316320
# %%
317-
database_file = "../data/assign_reviews.db"
318-
con = duckdb.connect(database_file)
321+
database_file = data_dir / "assign_reviews.db"
322+
con = duckdb.connect(str(database_file))
319323

320324
# %%
321325
reviewer_assignments_final = {
322-
item["reviewer_id"]: item["assigned_submission_ids"]
326+
item["reviewer_id"]: item["assigned_submission_ids"].tolist()
323327
for item in con.sql("table reviewer_assignments_02")
324328
.df()[["reviewer_id", "assigned_submission_ids"]]
325329
.to_dict("records")
326330
}
327-
with open("output/reviewer-assignments.json", "w") as fp:
331+
332+
with open(output_dir / "reviewer-assignments.json", "w") as fp:
328333
fp.write(json.dumps(reviewer_assignments_final, indent=4))
329334

330335
# %%

pixi.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ channels = ["conda-forge"]
1010
platforms = ["linux-64", "osx-64", "osx-arm64"]
1111

1212
[tasks]
13+
pre-processing = "cd notebooks && python pre-processing.py"
14+
assignments = "cd notebooks && python run-assignments.py"
1315

1416
[dependencies]
1517
python = "3.12.*"

0 commit comments

Comments
 (0)