mind-inria · lionelkusch · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/doc_conf/api.rst b/doc_conf/api.rst
@@ -19,8 +19,6 @@ Functions
    quantile_aggregation
    clustered_inference
    clustered_inference_pvalue
-   dcrt_zero
-   dcrt_pvalue
    desparsified_lasso
    desparsified_lasso_pvalue
    desparsified_group_lasso_pvalue
@@ -40,3 +38,4 @@ Classes
    LOCO
    CPI
    PFI
+   D0CRT
diff --git a/examples/plot_dcrt_example.py b/examples/plot_dcrt_example.py
@@ -14,7 +14,8 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-from hidimstat.dcrt import dcrt_zero, dcrt_pvalue
+
+from hidimstat.dcrt import D0CRT
 from hidimstat._utils.scenario import multivariate_1D_simulation
 
 plt.rcParams.update({"font.size": 21})
@@ -51,20 +52,16 @@
     y = np.maximum(0.0, y)
 
     ## dcrt Lasso ##
-    selection_features, X_res, sigma2, y_res = dcrt_zero(X, y, screening=False)
-    variables_important_lasso, pvals_lasso, ts_lasso = dcrt_pvalue(
-        selection_features, X_res, sigma2, y_res
-    )
+    d0crt_lasso = D0CRT(screening=False, statistic="residual")
+    d0crt_lasso.fit(X, y)
-    d0crt_lasso.fit(X, y)
+    d0crt_lasso = D0CRT(screening=False, statistic="residual").fit(X, y)
-    d0crt_lasso.fit(X, y)
+    d0crt_lasso = D0CRT(screening=False, statistic="residual").fit(X, y)
+    variables_important_lasso, pvals_lasso = d0crt_lasso.importance()
     typeI_error["Lasso"].append(sum(pvals_lasso[n_signal:] < alpha) / (p - n_signal))
     power["Lasso"].append(sum(pvals_lasso[:n_signal] < alpha) / (n_signal))
 
     ## dcrt Random Forest ##
-    selection_features, X_res, sigma2, y_res = dcrt_zero(
-        X, y, screening=False, statistic="random_forest"
-    )
-    rvariables_important_forest, pvals_forest, ts_forest = dcrt_pvalue(
-        selection_features, X_res, sigma2, y_res
-    )
+    d0crt_random_forest = D0CRT(screening=False, statistic="random_forest")
+    d0crt_random_forest.fit(X, y)
+    variables_important_forest, pvals_forest = d0crt_random_forest.importance()
     typeI_error["Forest"].append(sum(pvals_forest[n_signal:] < alpha) / (p - n_signal))
     power["Forest"].append(sum(pvals_forest[:n_signal] < alpha) / (n_signal))
 

diff --git a/examples/plot_model_agnostic_importance.py b/examples/plot_model_agnostic_importance.py
@@ -35,7 +35,7 @@
 from sklearn.model_selection import KFold
 from sklearn.svm import SVC
 
-from hidimstat import LOCO, dcrt_pvalue, dcrt_zero
+from hidimstat import LOCO, D0CRT
 
 #############################################################################
 # Generate data where classes are not linearly separable
@@ -65,17 +65,9 @@
 # test (:math:`H_0: X_j \perp\!\!\!\perp y | X_{-j}`) for each variable. However,
 # this test is based on a linear model (LogisticRegression) and fails to reject the null
 # in the presence of non-linear relationships.
-selection_features, X_residual, sigma2, y_res = dcrt_zero(
-    X, y, problem_type="classification", screening=False
-)
-_, pval_dcrt, _ = dcrt_pvalue(
-    selection_features=selection_features,
-    X_res=X_residual,
-    y_res=y_res,
-    sigma2=sigma2,
-    fdr=0.05,
-)
-
+d0crt = D0CRT(problem_type="classification", screening=False)
+d0crt.fit(X, y)
-d0crt.fit(X, y)
+d0crt = D0CRT(problem_type="classification", screening=False).fit(X, y)
-d0crt.fit(X, y)
+d0crt = D0CRT(problem_type="classification", screening=False).fit(X, y)
+_, pval_dcrt = d0crt.importance(fpr=0.05)
 
 ################################################################################
 # Compute p-values using LOCO

diff --git a/src/hidimstat/__init__.py b/src/hidimstat/__init__.py
@@ -12,6 +12,7 @@
     desparsified_lasso_pvalue,
     desparsified_group_lasso_pvalue,
 )
+from .dcrt import d0crt, D0CRT
 from .conditional_permutation_importance import CPI
 from .knockoffs import (
     model_x_knockoff,
@@ -22,8 +23,8 @@
 from .leave_one_covariate_out import LOCO
 from .noise_std import reid
 from .permutation_feature_importance import PFI
+
 from .statistical_tools.aggregation import quantile_aggregation
-from .dcrt import dcrt_zero, dcrt_pvalue
 
 try:
     from ._version import __version__
@@ -36,8 +37,8 @@
     "clustered_inference_pvalue",
     "ensemble_clustered_inference",
     "ensemble_clustered_inference_pvalue",
-    "dcrt_zero",
-    "dcrt_pvalue",
+    "d0crt",
+    "D0CRT",
     "desparsified_lasso",
     "desparsified_lasso_pvalue",
     "desparsified_group_lasso_pvalue",

diff --git a/src/hidimstat/_utils/docstring.py b/src/hidimstat/_utils/docstring.py
@@ -0,0 +1,115 @@
+from copy import deepcopy
+
+
+def _detection_section(lines):
+    """
+    Detect sections in a numpy-style docstring by identifying section headers and their underlines.
+
+    Parameters
+    ----------
+    lines : list of str
+        Lines of the docstring to parse.
+
+    Returns
+    -------
+    list of list of str
+        List of sections, where each section is a list of lines belonging to that section.
+        The first section is the summary, followed by other sections like Parameters, Returns, etc.
+    """
+    sections = []
+    index_line = 1
+    begin_section = index_line
+    while len(lines) > index_line:
+        if "-------" in lines[index_line]:
+            sections.append(lines[begin_section : index_line - 2])
+            begin_section = index_line - 1
+        index_line += 1
+    sections.append(lines[begin_section : len(lines)])
+    return sections
+
+
+def _parse_docstring(docstring):
+    """
+    Parse a numpy-style docstring into its component sections.
+
+    Parameters
+    ----------
+    docstring : str
+        The docstring to parse, following numpy docstring format.
+
+    Returns
+    -------
+    dict
+        Dictionary containing docstring sections with keys like 'short' (summary),
+        'Parameters', 'Returns', etc. Values are the text content of each section.
+    """
+    lines = docstring.split("\n")
+    section_texts = _detection_section(lines)
+    sections = {"short": section_texts[0]}
+    for section_text in section_texts:
+        if len(section_text) <= 1 or "---" not in section_text[1]:
+            sections["short"] = section_text
+        else:
+            sections["".join(section_text[0].split())] = section_text
+    return sections
+
+
+def _reindent(string):
+    """
+    Reindent a string by stripping whitespace and normalizing line breaks.
+
+    Parameters
+    ----------
+    string : list of str
+        The string content to reindent.
+
+    Returns
+    -------
+    str
+        Reindented string with normalized line breaks and indentation.
+    """
+    new_string = deepcopy(string)
+    for i in range(len(new_string)):
+        new_string[i] = "\n" + new_string[i]
+    new_string = "".join(new_string)
+    return "\n".join(l.strip() for l in new_string.strip().split("\n"))
+
+
+def _aggregate_docstring(list_docstring):
+    """
+    Combine multiple docstrings into a single docstring.
+
+    This function takes a list of docstrings, parses each one, and combines them into
+    a single coherent docstring. It keeps the summary from the first docstring,
+    combines all parameter sections, and uses the return section from the last docstring.
+
+    Parameters
+    ----------
+    list_docstring : list
+        List of docstrings to be combined. Each docstring should follow
+        numpy docstring format.
+
+    Returns
+    -------
+    doctring: str
+        A combined docstring containing:
+        - Summary from first docstring
+        - Combined parameters from all docstrings
+        - Returns section from last docstring
+        The returned docstring is properly reindented.
+    """
+    list_line = []
+    for index, docstring in enumerate(list_docstring):
+        if docstring is not None:
+            list_line.append(_parse_docstring(docstring=docstring))
+
+    # add summary
+    final_docstring = deepcopy(list_line[0]["short"])
+    # add parameter
+    final_docstring += list_line[0]["Parameters"]
+    for i in range(1, len(list_line)):
+        # add paraemter after remove the title section
+        final_docstring += list_line[i]["Parameters"][2:]
+    # the last return
+    final_docstring += list_line[-1]["Returns"]
+    return _reindent(final_docstring)