-
Notifications
You must be signed in to change notification settings - Fork 8
dcrt0(1/4): API2: add comments and docstring of the functions #220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
583e684
2a769b9
b8e70b0
9cbcd93
337df18
cea2b2e
b67c0de
8aa6707
b06a946
5545804
3d53a64
6071d18
a7b6af5
1b4e3c5
eb33ac4
a3e83da
a680fbf
e9c5a2b
9fa0dca
92d052e
3729fd5
768034d
3153e68
456b47b
db70f26
95bca86
a813248
594d281
70d6f5d
0248642
c0ac779
ee49aca
e984d42
820baaf
ddfad20
c5ae1ab
2c92f61
8e88847
e606327
72e2600
bd5af17
638fa8d
426c154
cdea6a3
8970a68
a203221
64e672b
892f8ef
cb92ff0
1b06f70
cd0147c
24f0c9f
2751f9f
b0f847c
7377108
60a21af
0b7c76f
5307497
919457b
1f666fc
9a5b08e
e0a9f60
38640ac
cc28798
b4aeed7
d462e25
d9e42c0
71c5d37
ab53ca2
473d1f2
e30fbfe
c6021fc
9a6cc83
a10668f
b00954a
7d78089
3e09b78
b2718ed
70446ac
8846ec5
b4b53b2
3291da3
ccc51d2
7898115
a94312a
3868c38
99f2d7e
a9b4b0f
1a1ac5f
a88c078
7aa8def
36a4d57
b435399
8d9a946
bf23937
91b4eb3
181ed7e
05ee677
34e2728
85f3033
82b7ef7
853b53e
309d982
0284cbb
2b9cdbf
7a24bda
447d2a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -14,7 +14,8 @@ | |||||
|
||||||
import matplotlib.pyplot as plt | ||||||
import numpy as np | ||||||
from hidimstat.dcrt import dcrt_zero, dcrt_pvalue | ||||||
|
||||||
from hidimstat.dcrt import D0CRT | ||||||
from hidimstat._utils.scenario import multivariate_1D_simulation | ||||||
|
||||||
plt.rcParams.update({"font.size": 21}) | ||||||
|
@@ -51,20 +52,16 @@ | |||||
y = np.maximum(0.0, y) | ||||||
|
||||||
## dcrt Lasso ## | ||||||
selection_features, X_res, sigma2, y_res = dcrt_zero(X, y, screening=False) | ||||||
variables_important_lasso, pvals_lasso, ts_lasso = dcrt_pvalue( | ||||||
selection_features, X_res, sigma2, y_res | ||||||
) | ||||||
d0crt_lasso = D0CRT(screening=False, statistic="residual") | ||||||
d0crt_lasso.fit(X, y) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
variables_important_lasso, pvals_lasso = d0crt_lasso.importance() | ||||||
typeI_error["Lasso"].append(sum(pvals_lasso[n_signal:] < alpha) / (p - n_signal)) | ||||||
power["Lasso"].append(sum(pvals_lasso[:n_signal] < alpha) / (n_signal)) | ||||||
|
||||||
## dcrt Random Forest ## | ||||||
selection_features, X_res, sigma2, y_res = dcrt_zero( | ||||||
X, y, screening=False, statistic="random_forest" | ||||||
) | ||||||
rvariables_important_forest, pvals_forest, ts_forest = dcrt_pvalue( | ||||||
selection_features, X_res, sigma2, y_res | ||||||
) | ||||||
d0crt_random_forest = D0CRT(screening=False, statistic="random_forest") | ||||||
d0crt_random_forest.fit(X, y) | ||||||
variables_important_forest, pvals_forest = d0crt_random_forest.importance() | ||||||
typeI_error["Forest"].append(sum(pvals_forest[n_signal:] < alpha) / (p - n_signal)) | ||||||
power["Forest"].append(sum(pvals_forest[:n_signal] < alpha) / (n_signal)) | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -35,7 +35,7 @@ | |||||
from sklearn.model_selection import KFold | ||||||
from sklearn.svm import SVC | ||||||
|
||||||
from hidimstat import LOCO, dcrt_pvalue, dcrt_zero | ||||||
from hidimstat import LOCO, D0CRT | ||||||
|
||||||
############################################################################# | ||||||
# Generate data where classes are not linearly separable | ||||||
|
@@ -65,17 +65,9 @@ | |||||
# test (:math:`H_0: X_j \perp\!\!\!\perp y | X_{-j}`) for each variable. However, | ||||||
# this test is based on a linear model (LogisticRegression) and fails to reject the null | ||||||
# in the presence of non-linear relationships. | ||||||
selection_features, X_residual, sigma2, y_res = dcrt_zero( | ||||||
X, y, problem_type="classification", screening=False | ||||||
) | ||||||
_, pval_dcrt, _ = dcrt_pvalue( | ||||||
selection_features=selection_features, | ||||||
X_res=X_residual, | ||||||
y_res=y_res, | ||||||
sigma2=sigma2, | ||||||
fdr=0.05, | ||||||
) | ||||||
|
||||||
d0crt = D0CRT(problem_type="classification", screening=False) | ||||||
d0crt.fit(X, y) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
_, pval_dcrt = d0crt.importance(fpr=0.05) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is already mentioned in another PR, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, this was mentioned in issue #217. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all importance methods should have should have an X=None, Y=None, as arguments, that would default to the already provided training data. In some cases, these arguments would not even be taken into account. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that all the importance methods will have an X and y is not yet defined. As a user, it's more readable to add additional optional parameters to a function than to have parameters which don't have any effect. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The point is about API uniformity: you want to be able to loop over methods with maximally similar arguments without triggering an error. The mechanism is fundamental in sklearn and related libraries. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you indicate to me one function in sklearn where some parameters are not used by the function due to the homogenisation of the API? |
||||||
|
||||||
################################################################################ | ||||||
# Compute p-values using LOCO | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
from copy import deepcopy | ||
|
||
|
||
def _detection_section(lines): | ||
""" | ||
Detect sections in a numpy-style docstring by identifying section headers and their underlines. | ||
|
||
Parameters | ||
---------- | ||
lines : list of str | ||
Lines of the docstring to parse. | ||
|
||
Returns | ||
------- | ||
list of list of str | ||
List of sections, where each section is a list of lines belonging to that section. | ||
The first section is the summary, followed by other sections like Parameters, Returns, etc. | ||
""" | ||
sections = [] | ||
index_line = 1 | ||
begin_section = index_line | ||
while len(lines) > index_line: | ||
if "-------" in lines[index_line]: | ||
sections.append(lines[begin_section : index_line - 2]) | ||
begin_section = index_line - 1 | ||
index_line += 1 | ||
sections.append(lines[begin_section : len(lines)]) | ||
return sections | ||
|
||
|
||
def _parse_docstring(docstring): | ||
""" | ||
Parse a numpy-style docstring into its component sections. | ||
|
||
Parameters | ||
---------- | ||
docstring : str | ||
The docstring to parse, following numpy docstring format. | ||
|
||
Returns | ||
------- | ||
dict | ||
Dictionary containing docstring sections with keys like 'short' (summary), | ||
'Parameters', 'Returns', etc. Values are the text content of each section. | ||
""" | ||
lines = docstring.split("\n") | ||
section_texts = _detection_section(lines) | ||
sections = {"short": section_texts[0]} | ||
for section_text in section_texts: | ||
if len(section_text) <= 1 or "---" not in section_text[1]: | ||
sections["short"] = section_text | ||
else: | ||
sections["".join(section_text[0].split())] = section_text | ||
return sections | ||
|
||
|
||
def _reindent(string): | ||
""" | ||
Reindent a string by stripping whitespace and normalizing line breaks. | ||
|
||
Parameters | ||
---------- | ||
string : list of str | ||
The string content to reindent. | ||
|
||
Returns | ||
------- | ||
str | ||
Reindented string with normalized line breaks and indentation. | ||
""" | ||
new_string = deepcopy(string) | ||
for i in range(len(new_string)): | ||
new_string[i] = "\n" + new_string[i] | ||
new_string = "".join(new_string) | ||
return "\n".join(l.strip() for l in new_string.strip().split("\n")) | ||
|
||
|
||
def _aggregate_docstring(list_docstring): | ||
""" | ||
Combine multiple docstrings into a single docstring. | ||
|
||
This function takes a list of docstrings, parses each one, and combines them into | ||
a single coherent docstring. It keeps the summary from the first docstring, | ||
combines all parameter sections, and uses the return section from the last docstring. | ||
|
||
Parameters | ||
---------- | ||
list_docstring : list | ||
List of docstrings to be combined. Each docstring should follow | ||
numpy docstring format. | ||
|
||
Returns | ||
------- | ||
doctring: str | ||
A combined docstring containing: | ||
- Summary from first docstring | ||
- Combined parameters from all docstrings | ||
- Returns section from last docstring | ||
The returned docstring is properly reindented. | ||
""" | ||
list_line = [] | ||
for index, docstring in enumerate(list_docstring): | ||
if docstring is not None: | ||
list_line.append(_parse_docstring(docstring=docstring)) | ||
|
||
# add summary | ||
final_docstring = deepcopy(list_line[0]["short"]) | ||
# add parameter | ||
final_docstring += list_line[0]["Parameters"] | ||
for i in range(1, len(list_line)): | ||
# add paraemter after remove the title section | ||
final_docstring += list_line[i]["Parameters"][2:] | ||
# the last return | ||
final_docstring += list_line[-1]["Returns"] | ||
return _reindent(final_docstring) |
Uh oh!
There was an error while loading. Please reload this page.