Skip to content

Commit 8c59b5d

Browse files
committed
update loader docstrings
1 parent c0a3b3f commit 8c59b5d

File tree

4 files changed

+81
-42
lines changed

4 files changed

+81
-42
lines changed

protdata/io/diann_loader.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,22 @@ def read_diann(
1313
"""
1414
Load DIA-NN protein group matrix (report.pg_matrix.tsv) into an AnnData object.
1515
16-
Args:
17-
file: Path to DIA-NN report.pg_matrix.tsv or a pandas DataFrame.
18-
intensity_suffix: Suffix for intensity columns (default: '_Intensity').
19-
index_column: Column name for protein group IDs (default: 'Protein.Group').
20-
sep: File separator (default: tab).
16+
Parameters
17+
----------
18+
file
19+
Path to DIA-NN report.pg_matrix.tsv file or a pandas DataFrame containing the data.
20+
index_column
21+
Column name for protein group IDs.
22+
sep
23+
File separator.
2124
22-
Returns:
25+
Returns
26+
-------
27+
anndata.AnnData
2328
AnnData object with:
24-
- X: intensity matrix (proteins x samples)
25-
- var: protein metadata
26-
- obs: sample metadata
29+
- X: intensity matrix (samples x proteins)
30+
- var: protein metadata (indexed by protein group IDs)
31+
- obs: sample metadata (indexed by sample names)
2732
"""
2833
if isinstance(file, pd.DataFrame):
2934
df = file.copy()
@@ -72,7 +77,11 @@ def read_diann(
7277
obs = pd.DataFrame(index=intensity_cols)
7378

7479
# Build uns
75-
uns = {"Search_Engine": "DIANN"}
80+
uns = {
81+
"RawInfo": {
82+
"Search_Engine": "DIANN",
83+
},
84+
}
7685

7786
# Create AnnData
7887
adata = ad.AnnData(X=X, obs=obs, var=var)

protdata/io/fragpipe_loader.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,28 @@ def read_fragpipe(
1616
sep: str = "\t",
1717
) -> ad.AnnData:
1818
"""
19-
Load FragPipe-Philosopher protein group matrix into an AnnData object.
19+
Load a FragPipe protein group matrix into an AnnData object.
2020
21-
Args:
22-
file: Path to combined_protein.tsv or a pandas DataFrame.
23-
intensity_column_prefix: Prefix for intensity columns (default: '[sample] MaxLFQ Intensity ').
24-
index_column: Column name for protein IDs (default: 'Protein').
25-
gene_names_column: Column name for gene names (default: 'Gene Names').
26-
confidence_column: Column name for protein probability (default: 'Protein Probability').
27-
sep: File separator (default: tab).
21+
Parameters
22+
----------
23+
file
24+
Path to the FragPipe combined_protein.tsv file or a pandas DataFrame containing the data.
25+
intensity_column_suffixes
26+
Suffix(es) for intensity columns to extract.
27+
The first suffix is used for the main matrix (X), others are stored as layers if present.
28+
index_column
29+
Column name to use as protein index.
30+
sep
31+
File separator if reading from file.
2832
29-
Returns:
33+
Returns
34+
-------
35+
anndata.AnnData
3036
AnnData object with:
31-
- X: intensity matrix (proteins x samples)
32-
- var: protein metadata
33-
- obs: sample metadata
37+
- X: intensity matrix (samples x proteins)
38+
- var: protein metadata (indexed by protein IDs)
39+
- obs: sample metadata (indexed by sample names)
40+
- layers: additional intensity matrices if multiple intensity column suffixes are provided
3441
"""
3542
if isinstance(intensity_column_suffixes, str):
3643
intensity_column_suffixes = [intensity_column_suffixes]
@@ -79,7 +86,11 @@ def read_fragpipe(
7986
obs = pd.DataFrame(index=sample_names)
8087

8188
# Build uns
82-
uns = {"Search_Engine": "FragPipe_Philosopher"}
89+
uns = {
90+
"RawInfo": {
91+
"Search_Engine": "FragPipe",
92+
},
93+
}
8394

8495
# Create AnnData
8596
adata = ad.AnnData(X=X, obs=obs, var=var, layers=layers)

protdata/io/maxquant_loader.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,28 @@ def read_maxquant(
2020
"""
2121
Load MaxQuant proteinGroups.txt into an AnnData object.
2222
23-
Args:
24-
file: Path to proteinGroups.txt or a pandas DataFrame.
25-
intensity_column_prefix: Prefix for intensity columns (default: 'LFQ intensity ').
26-
index_column: Column name for protein IDs (default: 'Protein IDs').
27-
gene_names_column: Column name for gene names (default: 'Gene names').
28-
sep: File separator (default: tab).
23+
Parameters
24+
----------
25+
file
26+
Path to the MaxQuant proteinGroups.txt file or a pandas DataFrame containing the data.
27+
intensity_column_prefixes
28+
Prefix(es) for intensity columns to extract.
29+
The first prefix is used for the main matrix (X), others are stored as layers if present.
30+
index_column
31+
Column name to use as protein index.
32+
filter_columns
33+
Columns to use for filtering out contaminants or unwanted entries.
34+
sep
35+
File separator if reading from file.
2936
30-
Returns:
37+
Returns
38+
-------
39+
anndata.AnnData
3140
AnnData object with:
32-
- X: intensity matrix (proteins x samples)
33-
- var: protein metadata
34-
- obs: sample metadata
41+
- X: intensity matrix (samples x proteins)
42+
- var: protein metadata (indexed by protein IDs)
43+
- obs: sample metadata (indexed by sample names)
44+
- layers: additional intensity matrices if multiple intensity column prefixes are provided
3545
"""
3646
if isinstance(intensity_column_prefixes, str):
3747
intensity_column_prefixes = [intensity_column_prefixes]
@@ -83,7 +93,12 @@ def read_maxquant(
8393
obs = pd.DataFrame(index=sample_names)
8494

8595
# Build uns
86-
uns = {"Search_Engine": "MaxQuant"}
96+
uns = {
97+
"RawInfo": {
98+
"Search_Engine": "MaxQuant",
99+
"filter_columns": filter_columns,
100+
},
101+
}
87102

88103
# Create AnnData
89104
adata = ad.AnnData(X=X, obs=obs, var=var, layers=layers, uns=uns)

protdata/io/mztab_loader.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,20 @@ def read_mztab(
1212
"""
1313
Load mzTab protein table into an AnnData object.
1414
15-
Args:
16-
file: Path to mzTab file or a pandas DataFrame (protein table).
17-
intensity_column_prefix: Prefix for intensity columns (default: 'protein_abundance_').
18-
index_column: Column indicating the protein groups (default: 'accession').
15+
Parameters
16+
----------
17+
file
18+
Path to mzTab file or a pandas DataFrame containing the protein table.
19+
index_column
20+
Column indicating the protein groups.
1921
20-
Returns:
22+
Returns
23+
-------
24+
anndata.AnnData
2125
AnnData object with:
2226
- X: intensity matrix (proteins x samples)
23-
- var: protein metadata
24-
- obs: sample metadata
27+
- var: protein metadata (indexed by protein accession)
28+
- obs: sample metadata (indexed by sample names)
2529
"""
2630
if isinstance(file, pd.DataFrame):
2731
df = file.copy()
@@ -54,7 +58,7 @@ def read_mztab(
5458
obs.index = obs.index.astype(str)
5559

5660
# Build uns
57-
uns = {"Search_Engine": df.search_engine.iloc[0]}
61+
uns = {"RawInfo": {"Search_Engine": df.search_engine.iloc[0]}}
5862

5963
# Create AnnData
6064
adata = ad.AnnData(X=X, obs=obs, var=var, uns=uns)

0 commit comments

Comments
 (0)