From 98ebd9cd863bb18639210d37ca290e79f085e8ad Mon Sep 17 00:00:00 2001 From: egrace479 Date: Wed, 23 Aug 2023 12:52:24 -0400 Subject: [PATCH 1/4] Add fix for lat or lon out of ranges and error message if type can't be converted to float. --- README.md | 7 ++++--- components/divs.py | 24 ++++++++++++++++-------- dashboard.py | 13 +++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4066de4..16d601b 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,14 @@ For full dashboard functionality, upload a CSV or XLS file with the following co - `View`: View of the sample (eg., 'ventral' or 'dorsal' for butterflies). - `Sex`: Sex of each sample. - `hybrid_stat`: Hybrid status of each sample (eg., 'valid_subspecies', 'subspecies_synonym', or 'unknown'). -- `lat`*: Latitude at which image was taken or specimen was collected. -- `lon`*: Longitude at which image was taken or specimen was collected. +- `lat`*: Latitude at which image was taken or specimen was collected: number in [-90,90]. +- `lon`*: Longitude at which image was taken or specimen was collected: number in [-180,180]. - `file_url`*: URL to access file. ***Note:** -- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. +- `lat` and `lon` columns are not required to utilize the dashboard, but there will be no map view if they are not included. Blank (or null) entries are recorded as `unknown`, and thus excluded from map view. - `Image_filename` and `file_url` are not required, but there will be no sample images option if either one is not included. +- `locality` may be provided, otherwise it will take on the value `lat|lon` or `unknown` if these are not provided. ## Running Dashboard diff --git a/components/divs.py b/components/divs.py index 80df309..5f89139 100644 --- a/components/divs.py +++ b/components/divs.py @@ -22,6 +22,10 @@ {'label': 'Locality', 'value': 'locality'} ] DOCS_URL = "https://github.com/Imageomics/dashboard-prototype#how-it-works" +DOCS_LINK = html.A("documentation", + href=DOCS_URL, + target='_blank', + style = ERROR_STYLE) def get_hist_div(mapping): ''' @@ -303,20 +307,24 @@ def get_error_div(error_dict): html.H3("Source data does not have '" + feature + "' column. ", style = ERROR_STYLE), html.H4(["Please see the ", - html.A("documentation", - href=DOCS_URL, - target='_blank', - style = ERROR_STYLE), + DOCS_LINK, " for list of required columns."], style = ERROR_STYLE) ]) + elif 'mapping' in error_dict.keys(): + error_msg = error_dict['mapping'] + error_div = html.Div([ + html.H4("Latitude or longitude columns have non-numeric values: " + error_msg + ".", + style = ERROR_STYLE), + html.H4(["Please see the ", + DOCS_LINK, + "."], + style = ERROR_STYLE) + ]) elif 'type' in error_dict.keys(): error_div = html.Div([ html.H4(["The source file is not a valid CSV format, please see the ", - html.A("documentation", - href=DOCS_URL, - target='_blank', - style = ERROR_STYLE), + DOCS_LINK, "."], style = ERROR_STYLE) ]) diff --git a/dashboard.py b/dashboard.py index 933a745..b5c057d 100644 --- a/dashboard.py +++ b/dashboard.py @@ -1,4 +1,5 @@ import pandas as pd +import numpy as np import base64 import io import json @@ -97,6 +98,18 @@ def parse_contents(contents, filename): else: included_features.append(feature) + # Check for lat/lon bounds & type if columns exist + if mapping: + try: + # Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180]) + if len(df.loc[np.abs(df['lat'].astype(float)) > 90]) > 0: + df.loc[np.abs(df['lat'].astype(float)) > 90, 'lat'] = 'unknown' + if len(df.loc[np.abs(df['lon'].astype(float)) > 180]) > 0: + df.loc[np.abs(df['lon'].astype(float)) > 180, 'lon'] = 'unknown' + except ValueError as e: + print(e) + return json.dumps({'error': {'mapping': str(e)}}) + # get dataset-determined static data: # the dataframe and categorical features - processed for map view if mapping is True # all possible species, subspecies From e8e1487249eee17fd2e2667389726501073d1f23 Mon Sep 17 00:00:00 2001 From: egrace479 Date: Wed, 23 Aug 2023 12:59:57 -0400 Subject: [PATCH 2/4] Add test data for updates. --- test_data/HCGSD_test_latLonOOB.csv | 11 +++++++++++ test_data/HCGSD_test_nonnumeric.csv | 11 +++++++++++ 2 files changed, 22 insertions(+) create mode 100644 test_data/HCGSD_test_latLonOOB.csv create mode 100644 test_data/HCGSD_test_nonnumeric.csv diff --git a/test_data/HCGSD_test_latLonOOB.csv b/test_data/HCGSD_test_latLonOOB.csv new file mode 100644 index 0000000..9cb0a65 --- /dev/null +++ b/test_data/HCGSD_test_latLonOOB.csv @@ -0,0 +1,11 @@ +NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,lon,speciesdesig,file_url +10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,-96.98333333,e. petiverana, +10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,92,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,-1.4,-740,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,9.883333333,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica, +10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,730,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ \ No newline at end of file diff --git a/test_data/HCGSD_test_nonnumeric.csv b/test_data/HCGSD_test_nonnumeric.csv new file mode 100644 index 0000000..2cc8cc2 --- /dev/null +++ b/test_data/HCGSD_test_nonnumeric.csv @@ -0,0 +1,11 @@ +NHM_Specimen,Image_filename,View,Species,Subspecies,Sex,addit_taxa_info,type_stat,hybrid_stat,in_reduced,locality,lat,lon,speciesdesig,file_url +10429021,10429021_V_lowres.png,,erato,notabilis,,f._notabilis,,subspecies synonym,1,,-1.583333333,-77.75,e. notabilis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428972,10428972_V_lowres.png,ventral,erato,petiverana,male,petiverana,,valid subspecies,1,Songolica (= Zongolica) MEX VC,18.66666667,,e. petiverana, +10429172,,ventral,,petiverana,male,petiverana,,valid subspecies,1,San Ramon NIC ZE,92,-84.68333333,e. petiverana,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428595,10428595_D_lowres.png,dorsal,erato,phyllis,male,f._phyllis,,subspecies synonym,1,Resistencia ARG CH,-27.45,-58.98333333,e. phyllis,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428140,10428140_V_lowres.png,ventral,,plesseni,male,plesseni,,valid subspecies,1,Banos ECD TU,,Non numeric,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428250,10428250_V_lowres.png,ventral,melpomene,,male,ab._rubra,,subspecies synonym,1,Caradoc (Hda) PER CU,-13.36666667,-70.95,m. schunkei,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10427979,,dorsal,melpomene,rosina_S,male,rosina_S,,valid subspecies,1,Turrialba CRI CA,Non numeric,-83.63333333,m. rosina,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ +10428803,10428803_D_lowres.png,dorsal,erato,guarica,female,guarica,,valid subspecies,1,Fusagasuga COL CN,4.35,-74.36666667,e. guarica, +10428169,10428169_V_lowres.png,ventral,melpomene,plesseni,male,f._pura,ST,subspecies synonym,1,Canelos ECD PA,-1.583333333,,m. plesseni,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/ventral_images/ +10428321,10428321_D_lowres.png,,melpomene,nanna,male,nanna,ST,valid subspecies,1,Espirito Santo BRA ES,-20.33333333,-40.28333333,m. nanna,https://github.com/Imageomics/dashboard-prototype/raw/main/test_data/images/dorsal_images/ \ No newline at end of file From a122d98cf56f3f7bb1fcf5518c25e1959b68fb41 Mon Sep 17 00:00:00 2001 From: Elizabeth Campolongo <38985481+egrace479@users.noreply.github.com> Date: Wed, 23 Aug 2023 18:14:55 -0400 Subject: [PATCH 3/4] Update range check with Series.between Co-authored-by: John Bradley --- dashboard.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dashboard.py b/dashboard.py index b5c057d..e17098b 100644 --- a/dashboard.py +++ b/dashboard.py @@ -102,10 +102,10 @@ def parse_contents(contents, filename): if mapping: try: # Check lat and lon within appropriate ranges (lat: [-90, 90], lon: [-180, 180]) - if len(df.loc[np.abs(df['lat'].astype(float)) > 90]) > 0: - df.loc[np.abs(df['lat'].astype(float)) > 90, 'lat'] = 'unknown' - if len(df.loc[np.abs(df['lon'].astype(float)) > 180]) > 0: - df.loc[np.abs(df['lon'].astype(float)) > 180, 'lon'] = 'unknown' + valid_lat = df['lat'].astype(float).between(-90, 90) + df.loc[~valid_lat, 'lat'] = 'unknown' + valid_lon = df['lon'].astype(float).between(-180, 180) + df.loc[~valid_lon, 'lon'] = 'unknown' except ValueError as e: print(e) return json.dumps({'error': {'mapping': str(e)}}) From 12305d79a43fa0d6a5609f0ec1c6d73d026cc598 Mon Sep 17 00:00:00 2001 From: egrace479 Date: Thu, 24 Aug 2023 09:45:10 -0400 Subject: [PATCH 4/4] Added test for bug-fix. --- tests/test_filters.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_filters.py b/tests/test_filters.py index 772fb1f..dbe3268 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -57,6 +57,15 @@ def generate_mock_upload(filepath): "expected_mapping": False, "expected_images": True }, + { # Check with full columns expected, but lat/lon out of bounds (1 lat and 2 lon) + "filepath": "test_data/HCGSD_test_latLonOOB.csv", + "filename": "HCGSD_test_latLonOOB.csv", + "expected_columns": ['Species', 'Subspecies', 'View', 'Sex', 'hybrid_stat', 'lat', 'lon', + 'file_url', 'Image_filename', 'locality', 'lat-lon', + 'Samples_at_locality', 'Species_at_locality', 'Subspecies_at_locality'], + "expected_mapping": True, + "expected_images": True + }, ] def test_parse_contents(): @@ -70,3 +79,7 @@ def test_parse_contents(): assert list(dff.columns) == case['expected_columns'] assert output['mapping'] == case['expected_mapping'] assert output['images'] == case['expected_images'] + + if case['filename'] == "HCGSD_test_latLonOOB.csv": + assert len(dff.loc[dff.lat == 'unknown']) == 1 + assert len(dff.loc[dff.lon == 'unknown']) == 2