-
Notifications
You must be signed in to change notification settings - Fork 32
Nanodrop eight support #230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
nathan-stender
merged 13 commits into
Benchling-Open-Source:main
from
bwolfe-benchling:nanodrop-eight-support
Jan 19, 2024
Merged
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
29a94f9
initial commit
bwolfe-benchling b945603
fix for concentration
bwolfe-benchling 32887c0
fix for missing sample ID
bwolfe-benchling 93d9291
update changelog
bwolfe-benchling ec04e46
Merge branch 'main' into nanodrop-eight-support
bwolfe-benchling 86d2057
use JsonFloat and InvalidJsonFloat
bwolfe-benchling 948275e
Merge branch 'main' into nanodrop-eight-support
bwolfe-benchling 03d16aa
refactor get_str
bwolfe-benchling cb456a5
use UNITLESS const
bwolfe-benchling ea6e416
refactor typing for _get_concentration
bwolfe-benchling a30045b
Merge branch 'main' into nanodrop-eight-support
bwolfe-benchling c6d5db1
remove str cast
bwolfe-benchling 0aa8200
Merge branch 'main' into nanodrop-eight-support
nathan-stender File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
302 changes: 302 additions & 0 deletions
302
src/allotropy/parsers/thermo_fisher_nanodrop_eight/nanodrop_eight_parser.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,302 @@ | ||
from collections.abc import Mapping | ||
from typing import Optional, Union | ||
import uuid | ||
|
||
import pandas as pd | ||
|
||
from allotropy.allotrope.models.shared.definitions.custom import ( | ||
TQuantityValueMicrogramPerMicroliter, | ||
TQuantityValueMicrogramPerMilliliter, | ||
TQuantityValueMilliAbsorbanceUnit, | ||
TQuantityValueMilligramPerMilliliter, | ||
TQuantityValueNanogramPerMicroliter, | ||
TQuantityValueNanogramPerMilliliter, | ||
TQuantityValueNanometer, | ||
TQuantityValuePicogramPerMilliliter, | ||
) | ||
from allotropy.allotrope.models.shared.definitions.definitions import ( | ||
InvalidJsonFloat, | ||
JsonFloat, | ||
TQuantityValue, | ||
) | ||
from allotropy.allotrope.models.shared.definitions.units import UNITLESS | ||
from allotropy.allotrope.models.spectrophotometry_benchling_2023_12_spectrophotometry import ( | ||
CalculatedDataAggregateDocument, | ||
CalculatedDataDocumentItem, | ||
DataSourceAggregateDocument1, | ||
DataSourceDocumentItem, | ||
DataSystemDocument, | ||
DeviceSystemDocument, | ||
MeasurementAggregateDocument, | ||
Model, | ||
SampleDocument, | ||
SpectrophotometryAggregateDocument, | ||
SpectrophotometryDocumentItem, | ||
UltravioletAbsorbancePointDetectionDeviceControlAggregateDocument, | ||
UltravioletAbsorbancePointDetectionDeviceControlDocumentItem, | ||
UltravioletAbsorbancePointDetectionMeasurementDocumentItems, | ||
) | ||
from allotropy.constants import ASM_CONVERTER_NAME, ASM_CONVERTER_VERSION | ||
from allotropy.named_file_contents import NamedFileContents | ||
from allotropy.parsers.thermo_fisher_nanodrop_eight.nanodrop_eight_reader import ( | ||
NanoDropEightReader, | ||
) | ||
from allotropy.parsers.utils.values import assert_not_none | ||
from allotropy.parsers.vendor_parser import VendorParser | ||
|
||
ConcentrationType = Union[ | ||
TQuantityValueMicrogramPerMicroliter, | ||
TQuantityValueMicrogramPerMilliliter, | ||
TQuantityValueMilligramPerMilliliter, | ||
TQuantityValueNanogramPerMicroliter, | ||
TQuantityValueNanogramPerMilliliter, | ||
TQuantityValuePicogramPerMilliliter, | ||
] | ||
ConcentrationClassType = Union[ | ||
type[TQuantityValueMicrogramPerMicroliter], | ||
type[TQuantityValueMicrogramPerMilliliter], | ||
type[TQuantityValueMilligramPerMilliliter], | ||
type[TQuantityValueNanogramPerMicroliter], | ||
type[TQuantityValueNanogramPerMilliliter], | ||
type[TQuantityValuePicogramPerMilliliter], | ||
] | ||
|
||
CONCENTRATION_UNIT_TO_TQUANTITY: Mapping[str, ConcentrationClassType] = { | ||
"ug/ul": TQuantityValueMicrogramPerMicroliter, | ||
"ug/ml": TQuantityValueMicrogramPerMilliliter, | ||
"mg/ml": TQuantityValueMilligramPerMilliliter, | ||
"ng/ul": TQuantityValueNanogramPerMicroliter, | ||
"ng/ml": TQuantityValueNanogramPerMilliliter, | ||
"pg/ul": TQuantityValuePicogramPerMilliliter, | ||
} | ||
|
||
|
||
def _get_str_or_none(data_frame: pd.DataFrame, row: int, column: str) -> Optional[str]: | ||
if column not in data_frame.columns: | ||
return None | ||
|
||
val = data_frame.iloc[row][column] | ||
if pd.isna(val): | ||
return None | ||
|
||
return str(val) | ||
|
||
|
||
def _get_str(data_frame: pd.DataFrame, row: int, column: str) -> str: | ||
val = _get_str_or_none(data_frame=data_frame, row=row, column=column) | ||
|
||
assert_not_none(val) | ||
|
||
return str(val) | ||
|
||
|
||
def _get_float(data_frame: pd.DataFrame, row: int, column: str) -> JsonFloat: | ||
try: | ||
return float(data_frame.iloc[row][column]) | ||
except (ValueError, TypeError): | ||
return InvalidJsonFloat.NaN | ||
|
||
|
||
def _get_concentration( | ||
conc: JsonFloat, unit: Optional[str] | ||
) -> Optional[ConcentrationType]: | ||
if unit and unit in CONCENTRATION_UNIT_TO_TQUANTITY and isinstance(conc, float): | ||
cls = CONCENTRATION_UNIT_TO_TQUANTITY[unit] | ||
return cls(value=conc) | ||
|
||
return None | ||
|
||
|
||
class NanodropEightParser(VendorParser): | ||
def to_allotrope(self, named_file_contents: NamedFileContents) -> Model: | ||
contents, filename = named_file_contents | ||
data = NanoDropEightReader.read(contents) | ||
data = self._add_measurement_uuids(data) | ||
return self._get_model(data, filename) | ||
|
||
def _get_model(self, data: pd.DataFrame, filename: str) -> Model: | ||
return Model( | ||
field_asm_manifest="http://purl.allotrope.org/manifests/spectrophotometry/BENCHLING/2023/12/spectrophotometry.manifest", | ||
spectrophotometry_aggregate_document=SpectrophotometryAggregateDocument( | ||
spectrophotometry_document=self._get_spectrophotometry_document(data), | ||
calculated_data_aggregate_document=CalculatedDataAggregateDocument( | ||
calculated_data_document=self._get_calculated_data_document(data), | ||
), | ||
data_system_document=DataSystemDocument( | ||
file_name=filename, | ||
ASM_converter_name=ASM_CONVERTER_NAME, | ||
ASM_converter_version=ASM_CONVERTER_VERSION, | ||
), | ||
device_system_document=DeviceSystemDocument( | ||
model_number="Nanodrop Eight", | ||
device_identifier="Nanodrop", | ||
), | ||
), | ||
) | ||
|
||
def _add_measurement_uuids(self, data: pd.DataFrame) -> pd.DataFrame: | ||
data["A260 uuid"] = [str(uuid.uuid4()) for _ in range(len(data.index))] | ||
data["A280 uuid"] = [str(uuid.uuid4()) for _ in range(len(data.index))] | ||
return data | ||
|
||
def _get_spectrophotometry_document( | ||
self, data: pd.DataFrame | ||
) -> list[SpectrophotometryDocumentItem]: | ||
return [ | ||
self._get_spectrophotometry_document_item(data, i) | ||
for i in range(len(data.index)) | ||
] | ||
|
||
def _get_calculated_data_document( | ||
self, data: pd.DataFrame | ||
) -> list[CalculatedDataDocumentItem]: | ||
calculated_data_documents = [] | ||
for i in range(len(data.index)): | ||
if _get_str_or_none(data, i, "260/280"): | ||
calculated_data_documents.append(self._get_260_280(data, i)) | ||
|
||
if _get_str_or_none(data, i, "260/230"): | ||
calculated_data_documents.append(self._get_260_230(data, i)) | ||
|
||
return calculated_data_documents | ||
|
||
def _get_260_280(self, data: pd.DataFrame, row: int) -> CalculatedDataDocumentItem: | ||
|
||
return CalculatedDataDocumentItem( | ||
calculated_data_name="A260/280", | ||
calculated_result=TQuantityValue( | ||
value=_get_float(data, row, "260/280"), unit=UNITLESS | ||
), | ||
calculated_data_identifier=str(uuid.uuid4()), | ||
data_source_aggregate_document=DataSourceAggregateDocument1( | ||
[ | ||
DataSourceDocumentItem( | ||
data_source_feature="absorbance", | ||
data_source_identifier=_get_str(data, row, "A260 uuid"), | ||
), | ||
DataSourceDocumentItem( | ||
data_source_feature="absorbance", | ||
data_source_identifier=_get_str(data, row, "A280 uuid"), | ||
), | ||
] | ||
), | ||
) | ||
|
||
def _get_260_230(self, data: pd.DataFrame, row: int) -> CalculatedDataDocumentItem: | ||
return CalculatedDataDocumentItem( | ||
calculated_data_name="A260/230", | ||
calculated_result=TQuantityValue( | ||
value=_get_float(data, row, "260/230"), unit=UNITLESS | ||
), | ||
calculated_data_identifier=str(uuid.uuid4()), | ||
data_source_aggregate_document=DataSourceAggregateDocument1( | ||
[ | ||
DataSourceDocumentItem( | ||
data_source_feature="absorbance", | ||
data_source_identifier=_get_str(data, row, "A260 uuid"), | ||
) | ||
] | ||
), | ||
) | ||
|
||
def _get_spectrophotometry_document_item( | ||
self, data: pd.DataFrame, row: int | ||
) -> SpectrophotometryDocumentItem: | ||
return SpectrophotometryDocumentItem( | ||
analyst=_get_str_or_none(data, row, "User ID"), | ||
measurement_aggregate_document=MeasurementAggregateDocument( | ||
measurement_time=self._get_date_time( | ||
_get_str(data, row, "Date") + " " + _get_str(data, row, "Time") | ||
), | ||
experiment_type=_get_str_or_none(data, row, "NA Type"), | ||
measurement_document=self._get_measurement_document(data=data, row=row), | ||
), | ||
) | ||
|
||
def _get_measurement_document( | ||
self, data: pd.DataFrame, row: int | ||
) -> list[UltravioletAbsorbancePointDetectionMeasurementDocumentItems]: | ||
measurement_docs = [] | ||
if _get_str_or_none(data, row, "A260"): | ||
measurement_docs.append( | ||
UltravioletAbsorbancePointDetectionMeasurementDocumentItems( | ||
measurement_identifier=_get_str(data, row, "A260 uuid"), | ||
sample_document=SampleDocument( | ||
sample_identifier=_get_str(data, row, "Sample ID") | ||
if _get_str_or_none(data, row, "Sample ID") | ||
else "NA", | ||
well_plate_identifier=_get_str_or_none(data, row, "Plate ID"), | ||
location_identifier=_get_str_or_none(data, row, "Well"), | ||
), | ||
# capture concentration on the A260 measurement document if the experiment type is | ||
# DNA or RNA, protein and other concentration is captured on A280 measurment | ||
mass_concentration=_get_concentration( | ||
_get_float(data, row, str(self._get_concentration_col(data))), | ||
_get_str_or_none(data, row, "Units"), | ||
) | ||
if _get_str_or_none(data, row, "NA Type") | ||
and "NA" in _get_str(data, row, "NA Type") | ||
and self._get_concentration_col(data) | ||
else None, | ||
device_control_aggregate_document=UltravioletAbsorbancePointDetectionDeviceControlAggregateDocument( | ||
device_control_document=[ | ||
UltravioletAbsorbancePointDetectionDeviceControlDocumentItem( | ||
device_type="absorbance detector", | ||
detector_wavelength_setting=TQuantityValueNanometer( | ||
value=260 | ||
), | ||
) | ||
] | ||
), | ||
absorbance=TQuantityValueMilliAbsorbanceUnit( | ||
_get_float(data, row, "A260") | ||
), | ||
) | ||
) | ||
a280_col = "A280" | ||
if a280_col not in data.columns and "A280 10mm" in data.columns: | ||
a280_col = "A280 10mm" | ||
if _get_str_or_none(data, row, a280_col): | ||
measurement_docs.append( | ||
UltravioletAbsorbancePointDetectionMeasurementDocumentItems( | ||
measurement_identifier=_get_str(data, row, "A280 uuid"), | ||
sample_document=SampleDocument( | ||
sample_identifier=_get_str(data, row, "Sample ID") | ||
if _get_str_or_none(data, row, "Sample ID") | ||
else "NA", | ||
well_plate_identifier=_get_str_or_none(data, row, "Plate ID"), | ||
location_identifier=_get_str_or_none(data, row, "Well"), | ||
), | ||
# capture concentration on the A280 measurement document if the experiment type is | ||
# something other than DNA or RNA | ||
mass_concentration=_get_concentration( | ||
_get_float(data, row, str(self._get_concentration_col(data))), | ||
_get_str_or_none(data, row, "Units"), | ||
) | ||
if _get_str_or_none(data, row, "NA Type") | ||
and "NA" not in _get_str(data, row, "NA Type") | ||
and self._get_concentration_col(data) | ||
else None, | ||
device_control_aggregate_document=UltravioletAbsorbancePointDetectionDeviceControlAggregateDocument( | ||
device_control_document=[ | ||
UltravioletAbsorbancePointDetectionDeviceControlDocumentItem( | ||
device_type="absorbance detector", | ||
detector_wavelength_setting=TQuantityValueNanometer( | ||
value=280 | ||
), | ||
) | ||
] | ||
), | ||
absorbance=TQuantityValueMilliAbsorbanceUnit( | ||
_get_float(data, row, a280_col) | ||
), | ||
) | ||
) | ||
|
||
return measurement_docs | ||
|
||
def _get_concentration_col(self, data: pd.DataFrame) -> Optional[str]: | ||
for col in data.columns: | ||
if col.lower() in ["conc.", "conc", "concentration"]: | ||
nathan-stender marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return col | ||
return None |
25 changes: 25 additions & 0 deletions
25
src/allotropy/parsers/thermo_fisher_nanodrop_eight/nanodrop_eight_reader.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from io import StringIO | ||
|
||
import pandas as pd | ||
|
||
from allotropy.parsers import lines_reader | ||
from allotropy.parsers.lines_reader import CsvReader | ||
from allotropy.types import IOType | ||
|
||
|
||
class NanoDropEightReader: | ||
@classmethod | ||
def read(cls, contents: IOType) -> pd.DataFrame: | ||
all_lines = lines_reader.read_to_lines(contents) | ||
reader = CsvReader(all_lines) | ||
lines = reader.pop_csv_block_as_lines() | ||
raw_data = pd.read_csv( | ||
StringIO("\n".join(lines)), | ||
sep="\t", | ||
dtype={"Plate ID": str, "Sample ID": str}, | ||
bdworth marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Prevent pandas from rounding decimal values, at the cost of some speed. | ||
float_precision="round_trip", | ||
) | ||
raw_data = raw_data.rename(columns=lambda x: x.strip()) | ||
|
||
return raw_data |
Empty file.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.