update loader docstrings

mffrank · mffrank · commit 8c59b5dc2c52 · 2025-07-09T11:54:50.000-07:00
diff --git a/protdata/io/diann_loader.py b/protdata/io/diann_loader.py
@@ -13,17 +13,22 @@ def read_diann(
     """
     Load DIA-NN protein group matrix (report.pg_matrix.tsv) into an AnnData object.
 
-    Args:
-        file: Path to DIA-NN report.pg_matrix.tsv or a pandas DataFrame.
-        intensity_suffix: Suffix for intensity columns (default: '_Intensity').
-        index_column: Column name for protein group IDs (default: 'Protein.Group').
-        sep: File separator (default: tab).
+    Parameters
+    ----------
+    file
+        Path to DIA-NN report.pg_matrix.tsv file or a pandas DataFrame containing the data.
+    index_column
+        Column name for protein group IDs.
+    sep
+        File separator.
 
-    Returns:
+    Returns
+    -------
+    anndata.AnnData
         AnnData object with:
-            - X: intensity matrix (proteins x samples)
-            - var: protein metadata
-            - obs: sample metadata
+            - X: intensity matrix (samples x proteins)
+            - var: protein metadata (indexed by protein group IDs)
+            - obs: sample metadata (indexed by sample names)
     """
     if isinstance(file, pd.DataFrame):
         df = file.copy()
@@ -72,7 +77,11 @@ def read_diann(
     obs = pd.DataFrame(index=intensity_cols)
 
     # Build uns
-    uns = {"Search_Engine": "DIANN"}
+    uns = {
+        "RawInfo": {
+            "Search_Engine": "DIANN",
+        },
+    }
 
     # Create AnnData
     adata = ad.AnnData(X=X, obs=obs, var=var)
diff --git a/protdata/io/fragpipe_loader.py b/protdata/io/fragpipe_loader.py
@@ -16,21 +16,28 @@ def read_fragpipe(
     sep: str = "\t",
 ) -> ad.AnnData:
     """
-    Load FragPipe-Philosopher protein group matrix into an AnnData object.
+    Load a FragPipe protein group matrix into an AnnData object.
 
-    Args:
-        file: Path to combined_protein.tsv or a pandas DataFrame.
-        intensity_column_prefix: Prefix for intensity columns (default: '[sample] MaxLFQ Intensity ').
-        index_column: Column name for protein IDs (default: 'Protein').
-        gene_names_column: Column name for gene names (default: 'Gene Names').
-        confidence_column: Column name for protein probability (default: 'Protein Probability').
-        sep: File separator (default: tab).
+    Parameters
+    ----------
+    file
+        Path to the FragPipe combined_protein.tsv file or a pandas DataFrame containing the data.
+    intensity_column_suffixes
+        Suffix(es) for intensity columns to extract.
+        The first suffix is used for the main matrix (X), others are stored as layers if present.
+    index_column
+        Column name to use as protein index.
+    sep
+        File separator if reading from file.
 
-    Returns:
+    Returns
+    -------
+    anndata.AnnData
         AnnData object with:
-            - X: intensity matrix (proteins x samples)
-            - var: protein metadata
-            - obs: sample metadata
+            - X: intensity matrix (samples x proteins)
+            - var: protein metadata (indexed by protein IDs)
+            - obs: sample metadata (indexed by sample names)
+            - layers: additional intensity matrices if multiple intensity column suffixes are provided
     """
     if isinstance(intensity_column_suffixes, str):
         intensity_column_suffixes = [intensity_column_suffixes]
@@ -79,7 +86,11 @@ def read_fragpipe(
     obs = pd.DataFrame(index=sample_names)
 
     # Build uns
-    uns = {"Search_Engine": "FragPipe_Philosopher"}
+    uns = {
+        "RawInfo": {
+            "Search_Engine": "FragPipe",
+        },
+    }
 
     # Create AnnData
     adata = ad.AnnData(X=X, obs=obs, var=var, layers=layers)
diff --git a/protdata/io/maxquant_loader.py b/protdata/io/maxquant_loader.py
@@ -20,18 +20,28 @@ def read_maxquant(
     """
     Load MaxQuant proteinGroups.txt into an AnnData object.
 
-    Args:
-        file: Path to proteinGroups.txt or a pandas DataFrame.
-        intensity_column_prefix: Prefix for intensity columns (default: 'LFQ intensity ').
-        index_column: Column name for protein IDs (default: 'Protein IDs').
-        gene_names_column: Column name for gene names (default: 'Gene names').
-        sep: File separator (default: tab).
+    Parameters
+    ----------
+    file
+        Path to the MaxQuant proteinGroups.txt file or a pandas DataFrame containing the data.
+    intensity_column_prefixes
+        Prefix(es) for intensity columns to extract.
+        The first prefix is used for the main matrix (X), others are stored as layers if present.
+    index_column
+        Column name to use as protein index.
+    filter_columns
+        Columns to use for filtering out contaminants or unwanted entries.
+    sep
+        File separator if reading from file.
 
-    Returns:
+    Returns
+    -------
+    anndata.AnnData
         AnnData object with:
-            - X: intensity matrix (proteins x samples)
-            - var: protein metadata
-            - obs: sample metadata
+            - X: intensity matrix (samples x proteins)
+            - var: protein metadata (indexed by protein IDs)
+            - obs: sample metadata (indexed by sample names)
+            - layers: additional intensity matrices if multiple intensity column prefixes are provided
     """
     if isinstance(intensity_column_prefixes, str):
         intensity_column_prefixes = [intensity_column_prefixes]
@@ -83,7 +93,12 @@ def read_maxquant(
     obs = pd.DataFrame(index=sample_names)
 
     # Build uns
-    uns = {"Search_Engine": "MaxQuant"}
+    uns = {
+        "RawInfo": {
+            "Search_Engine": "MaxQuant",
+            "filter_columns": filter_columns,
+        },
+    }
 
     # Create AnnData
     adata = ad.AnnData(X=X, obs=obs, var=var, layers=layers, uns=uns)
diff --git a/protdata/io/mztab_loader.py b/protdata/io/mztab_loader.py
@@ -12,16 +12,20 @@ def read_mztab(
     """
     Load mzTab protein table into an AnnData object.
 
-    Args:
-        file: Path to mzTab file or a pandas DataFrame (protein table).
-        intensity_column_prefix: Prefix for intensity columns (default: 'protein_abundance_').
-        index_column: Column indicating the protein groups (default: 'accession').
+    Parameters
+    ----------
+    file
+        Path to mzTab file or a pandas DataFrame containing the protein table.
+    index_column
+        Column indicating the protein groups.
 
-    Returns:
+    Returns
+    -------
+    anndata.AnnData
         AnnData object with:
             - X: intensity matrix (proteins x samples)
-            - var: protein metadata
-            - obs: sample metadata
+            - var: protein metadata (indexed by protein accession)
+            - obs: sample metadata (indexed by sample names)
     """
     if isinstance(file, pd.DataFrame):
         df = file.copy()
@@ -54,7 +58,7 @@ def read_mztab(
     obs.index = obs.index.astype(str)
 
     # Build uns
-    uns = {"Search_Engine": df.search_engine.iloc[0]}
+    uns = {"RawInfo": {"Search_Engine": df.search_engine.iloc[0]}}
 
     # Create AnnData
     adata = ad.AnnData(X=X, obs=obs, var=var, uns=uns)