From 837872ddab1fb3cf11773062956554f2d935b079 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 5 Jul 2021 17:51:30 -0400 Subject: [PATCH 01/32] Initial commit --- pvlib/iotools/bsrn.py | 143 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 138 insertions(+), 5 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 0d3b31eb2c..509a193cf7 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -1,9 +1,11 @@ -"""Functions to read data from the Baseline Surface Radiation Network (BSRN). +dat"""Functions to read data from the Baseline Surface Radiation Network (BSRN). .. codeauthor:: Adam R. Jensen """ import pandas as pd import gzip +import ftplib +import io COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] @@ -17,6 +19,128 @@ 'temp_air', 'relative_humidity', 'pressure'] +def get_bsrn(start, end, station, username, password, + path=None, ftp_url='ftp.bsrn.awi.de'): + """ + Retrieve ground measured irradiance data from the BSRN FTP server. + + The BSRN (Baseline Surface Radiation Network) is a world wide network + of high-quality solar radiation monitoring stations as described in [1]_. + Data is retrieved from the BSRN FTP server [2]_. + + Currently only the basic measurements (LR0100) are parsed, which include + global, diffuse, direct and downwelling long-wave radiation [3]_. Future + updates may include parsing of additional data and metadata. + + Parameters + ---------- + start: datetime-like + First day of the requested period + end: datetime-like + Last day of the requested period + station: str + 3-letter BSRN station abbreviation + username: str + username for accessing the BSRN ftp server + password: str + password for accessing the BSRN ftp server + path: str or path-like, default None + if specified, path (local or abs.) of where to save files + ftp_url: str, default 'ftp.bsrn.awi.de' + URL of the BSRN ftp server. + + Returns + ------- + data: DataFrame + timeseries data from the BSRN archive, see + :func:`pvlib.iotools.read_bsrn` for fields + metadata: dict + metadata for the last available monthly file + + Notes + ----- + Required username and password can be obtained for free as described in the + BSRN's Data Release Guidelines [4]_. + + Raises + ------ + ValueError + if the specified station does not exist on the FTP server or if no + files match the specified station and timeframe. + UserWarning + if a requested file is missing a UserWarning is returned with the + filename. + + Examples + -------- + >>> # Retrieve two months irradiance data from the Cabauw BSRN station + >>> data, metadata = pvlib.iotools.get_bsrn( # doctest: +SKIP + >>> start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,12,1), # doctest: +SKIP + >>> station='cab', username='yourusername', password='yourpassword') # doctest: +SKIP + + See also + -------- + pvlib.iotools.read_bsrn + + References + ---------- + .. [1] `World Radiation Monitoring Center - Baseline Surface Radiation + Network (BSRN) + `_ + .. [2] `BSRN Data Retrieval via FTP + `_ + .. [3] `Update of the Technical Plan for BSRN Data Management, 2013, + Global Climate Observing System (GCOS) GCOS-172. + `_ + .. [4] `BSRN Data Release Guidelines + `_ + """ # noqa: E501 + # The ftp server uses lowercase station abbreviations + station = station.lower() + + # Generate list files to download based on start/end (SSSMMYY.dat.gz) + filenames = pd.date_range(start, end + pd.DateOffset(months=1), freq='1M')\ + .strftime(f"{station}%m%y.dat.gz").tolist() + + # Create FTP connection + with ftplib.FTP(ftp_url, username, password) as ftp: + # Change to station sub-directory. + # Serves as a check that the station exists. + try: + ftp.cwd(f'/{station}') + except ftplib.error_perm: + raise ValueError('Station sub-directory does not exist. Specified ' + 'station is probably not a proper three letter ' + 'station abbreviation.') + dfs = [] # Initialize list for monthly dataframes + for filename in filenames: + try: + bio = io.BytesIO() # Initialize BytesIO object + # Retrieve binary file from server and write to BytesIO object + res = ftp.retrbinary(f'RETR {filename}', bio.write) + # Decompress/unzip and decode the binary file + text = gzip.decompress(bio.getvalue()).decode('utf-8') + dfi, metadata = read_bsrn(text) # parse file + dfs.append(dfi) + # Save local file + if path is not None: + # Create local file + with open(os.path.join(path, filename), 'wb') as f: + f.write(bio.getbuffer()) # Write local file + # FTP client raises an error if the file does not exist on server + except ftplib.error_perm: + UserWarning(f'{filename} does not exist') + ftp.close() # Close FTP connection + + # Concatenate monthly dataframes to one dataframe + if len(dfs) > 0: + data = pd.concat(dfs, axis='rows') + else: + raise ValueError('No files for the specified station and timeframe') + # Return dataframe and the metadata for the last available file + return data, metadata + + def read_bsrn(filename): """ Read a BSRN station-to-archive file into a DataFrame. @@ -28,12 +152,10 @@ def read_bsrn(filename): updates may include parsing of additional data and meta-data. BSRN files are freely available and can be accessed via FTP [3]_. Required - username and password are easily obtainable as described in the BSRN's Data Release Guidelines [4]_. - Parameters ---------- filename: str @@ -75,6 +197,10 @@ def read_bsrn(filename): pressure float Atmospheric pressure [hPa] ======================= ====== ========================================== + See also + -------- + pvlib.iotools.get_bsrn + References ---------- .. [1] `World Radiation Monitoring Center - Baseline Surface Radiation @@ -91,7 +217,9 @@ def read_bsrn(filename): # Read file and store the starting line number for each logical record (LR) line_no_dict = {} - if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file + if isinstance(filename, str): + open_func, mode = io.StringIO, None + elif str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file open_func, mode = gzip.open, 'rt' else: open_func, mode = open, 'r' @@ -115,6 +243,10 @@ def read_bsrn(filename): end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 nrows = end_row-start_row+1 + # Necessary to pass text to pd.read_fwf + if isinstance(filename, str): + filename = io.StringIO(filename) + # Read file as a fixed width file (fwf) data = pd.read_fwf(filename, skiprows=start_row, nrows=nrows, header=None, colspecs=COL_SPECS, na_values=[-999.0, -99.9], @@ -139,4 +271,5 @@ def read_bsrn(filename): + pd.to_timedelta(data['day']-1, unit='d') + pd.to_timedelta(data['minute'], unit='T')) - return data + metadata = {} + return data, metadata From f546c777379ec6af752d23fa87263158d1d57e7f Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 5 Jul 2021 17:55:49 -0400 Subject: [PATCH 02/32] Fix typo in doc string --- pvlib/iotools/bsrn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 509a193cf7..34ea580a78 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -1,4 +1,4 @@ -dat"""Functions to read data from the Baseline Surface Radiation Network (BSRN). +"""Functions to read data from the Baseline Surface Radiation Network (BSRN). .. codeauthor:: Adam R. Jensen """ From 324644f623617fd8b4d2adca31be8eb42d784f21 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 5 Jul 2021 18:12:19 -0400 Subject: [PATCH 03/32] Fix stickler --- pvlib/iotools/bsrn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 34ea580a78..2bca3481e1 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -6,6 +6,7 @@ import gzip import ftplib import io +import os COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] @@ -27,7 +28,7 @@ def get_bsrn(start, end, station, username, password, The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. Data is retrieved from the BSRN FTP server [2]_. - + Currently only the basic measurements (LR0100) are parsed, which include global, diffuse, direct and downwelling long-wave radiation [3]_. Future updates may include parsing of additional data and metadata. @@ -219,7 +220,7 @@ def read_bsrn(filename): line_no_dict = {} if isinstance(filename, str): open_func, mode = io.StringIO, None - elif str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file + elif str(filename).endswith('.gz'): # check if file is gzipped (.gz) open_func, mode = gzip.open, 'rt' else: open_func, mode = open, 'r' From d010d512166b92f4d5159a03df0e0d01fd9553ce Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 6 Jul 2021 23:26:12 -0400 Subject: [PATCH 04/32] Update api.rst, __init__.py, and whatsnew --- docs/sphinx/source/api.rst | 1 + docs/sphinx/source/whatsnew/v0.9.0.rst | 5 +++-- pvlib/iotools/__init__.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst index 0af200e67c..970b8f2c48 100644 --- a/docs/sphinx/source/api.rst +++ b/docs/sphinx/source/api.rst @@ -486,6 +486,7 @@ of sources and file formats relevant to solar energy modeling. iotools.parse_psm3 iotools.get_pvgis_tmy iotools.read_pvgis_tmy + iotools.get_bsrn iotools.read_bsrn iotools.get_cams iotools.read_cams diff --git a/docs/sphinx/source/whatsnew/v0.9.0.rst b/docs/sphinx/source/whatsnew/v0.9.0.rst index 80dab7dc23..be9b7506ca 100644 --- a/docs/sphinx/source/whatsnew/v0.9.0.rst +++ b/docs/sphinx/source/whatsnew/v0.9.0.rst @@ -103,8 +103,9 @@ Deprecations Enhancements ~~~~~~~~~~~~ -* Add :func:`~pvlib.iotools.read_bsrn` for reading BSRN solar radiation data - files. (:pull:`1145`, :issue:`1015`) +* Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn` + for retrieving and reading BSRN solar radiation data files. + (:pull:`1254`, :pull:`1145`, :issue:`1015`) * Add :func:`~pvlib.iotools.get_cams`, :func:`~pvlib.iotools.parse_cams`, and :func:`~pvlib.iotools.read_cams` diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index b717c801ca..34cdb8975c 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -14,6 +14,7 @@ from pvlib.iotools.psm3 import parse_psm3 # noqa: F401 from pvlib.iotools.pvgis import get_pvgis_tmy, read_pvgis_tmy # noqa: F401 from pvlib.iotools.bsrn import read_bsrn # noqa: F401 +from pvlib.iotools.bsrn import get_bsrn # noqa: F401 from pvlib.iotools.sodapro import get_cams # noqa: F401 from pvlib.iotools.sodapro import read_cams # noqa: F401 from pvlib.iotools.sodapro import parse_cams # noqa: F401 From 66cc5bb6d402b7c069b1c8fd3558fd1418766cdc Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 6 Jul 2021 23:26:28 -0400 Subject: [PATCH 05/32] Major refactoring --- pvlib/iotools/bsrn.py | 109 ++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 2bca3481e1..0ddd20a10d 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -5,9 +5,12 @@ import pandas as pd import gzip import ftplib +import warnings import io import os +BSRN_FTP_URL = "ftp.bsrn.awi.de" + COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] @@ -21,7 +24,7 @@ def get_bsrn(start, end, station, username, password, - path=None, ftp_url='ftp.bsrn.awi.de'): + local_path=None): """ Retrieve ground measured irradiance data from the BSRN FTP server. @@ -30,8 +33,8 @@ def get_bsrn(start, end, station, username, password, Data is retrieved from the BSRN FTP server [2]_. Currently only the basic measurements (LR0100) are parsed, which include - global, diffuse, direct and downwelling long-wave radiation [3]_. Future - updates may include parsing of additional data and metadata. + global, diffuse, direct, and downwelling long-wave radiation [3]_. Future + updates may include parsing of additional data and metadata. Parameters ---------- @@ -45,10 +48,8 @@ def get_bsrn(start, end, station, username, password, username for accessing the BSRN ftp server password: str password for accessing the BSRN ftp server - path: str or path-like, default None - if specified, path (local or abs.) of where to save files - ftp_url: str, default 'ftp.bsrn.awi.de' - URL of the BSRN ftp server. + local_path: str or path-like, default: None, optional + If specified, path (abs. or relative) of where to save files Returns ------- @@ -66,10 +67,10 @@ def get_bsrn(start, end, station, username, password, Raises ------ ValueError - if the specified station does not exist on the FTP server or if no + If the specified station does not exist on the FTP server or if no files match the specified station and timeframe. UserWarning - if a requested file is missing a UserWarning is returned with the + If a requested file is missing a UserWarning is returned with the filename. Examples @@ -104,41 +105,49 @@ def get_bsrn(start, end, station, username, password, .strftime(f"{station}%m%y.dat.gz").tolist() # Create FTP connection - with ftplib.FTP(ftp_url, username, password) as ftp: - # Change to station sub-directory. - # Serves as a check that the station exists. + with ftplib.FTP(BSRN_FTP_URL, username, password) as ftp: + # Change to station sub-directory (checks that the station exists) try: ftp.cwd(f'/{station}') - except ftplib.error_perm: - raise ValueError('Station sub-directory does not exist. Specified ' - 'station is probably not a proper three letter ' - 'station abbreviation.') + except ftplib.error_perm as e: + raise KeyError('Station sub-directory does not exist. Specified ' + 'station is probably not a proper three letter ' + 'station abbreviation.') from e dfs = [] # Initialize list for monthly dataframes for filename in filenames: try: bio = io.BytesIO() # Initialize BytesIO object # Retrieve binary file from server and write to BytesIO object - res = ftp.retrbinary(f'RETR {filename}', bio.write) + response = ftp.retrbinary(f'RETR {filename}', bio.write) + # Check that transfer was successfull + if not response.startswith('226 Transfer complete'): + raise ftplib.Error(response) # Decompress/unzip and decode the binary file text = gzip.decompress(bio.getvalue()).decode('utf-8') - dfi, metadata = read_bsrn(text) # parse file + # Convert string to StrinIO and parse data + dfi, metadata = read_bsrn(io.StringIO(text)) dfs.append(dfi) - # Save local file - if path is not None: + # Save local file if local_path is specified + if local_path is not None: # Create local file - with open(os.path.join(path, filename), 'wb') as f: + with open(os.path.join(local_path, filename), 'wb') as f: f.write(bio.getbuffer()) # Write local file # FTP client raises an error if the file does not exist on server - except ftplib.error_perm: - UserWarning(f'{filename} does not exist') + except ftplib.error_perm as e: + if str(e) == '550 Failed to open file.': + warnings.warn(f'File: {filename} does not exist') + else: + raise ValueError(f'Error perm: {filename}') from e ftp.close() # Close FTP connection # Concatenate monthly dataframes to one dataframe if len(dfs) > 0: data = pd.concat(dfs, axis='rows') - else: - raise ValueError('No files for the specified station and timeframe') - # Return dataframe and the metadata for the last available file + else: # Return empty dataframe + data = pd.DataFrame(columns=BSRN_COLUMNS) + metadata = {} + warnings.warn('No files were avaiable for the specified timeframe.') + # Return dataframe and metadata (metadata belongs to last available file) return data, metadata @@ -149,7 +158,7 @@ def read_bsrn(filename): The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. The function only parses the basic measurements (LR0100), which include - global, diffuse, direct and downwelling long-wave radiation [2]_. Future + global, diffuse, direct, and downwelling long-wave radiation [2]_. Future updates may include parsing of additional data and meta-data. BSRN files are freely available and can be accessed via FTP [3]_. Required @@ -159,8 +168,8 @@ def read_bsrn(filename): Parameters ---------- - filename: str - A relative or absolute file path. + filename: str, path-like or file-like object + Name, path, or buffer of a BSRN station-to-archive data file Returns ------- @@ -218,13 +227,13 @@ def read_bsrn(filename): # Read file and store the starting line number for each logical record (LR) line_no_dict = {} - if isinstance(filename, str): - open_func, mode = io.StringIO, None + if isinstance(filename, io.StringIO): + f = filename elif str(filename).endswith('.gz'): # check if file is gzipped (.gz) - open_func, mode = gzip.open, 'rt' + f = gzip.open(filename, 'rt') else: - open_func, mode = open, 'r' - with open_func(filename, mode) as f: + f = open(filename, 'r') + if True: f.readline() # first line should be *U0001, so read it and discard line_no_dict['0001'] = 0 date_line = f.readline() # second line contains the year and month @@ -235,23 +244,21 @@ def read_bsrn(filename): if line.startswith('*'): # Find start of all logical records line_no_dict[line[2:6]] = num # key is 4 digit LR number - # Determine start and end line of logical record LR0100 to be parsed - start_row = line_no_dict['0100'] + 1 # Start line number - # If LR0100 is the last logical record, then read rest of file - if start_row-1 == max(line_no_dict.values()): - end_row = num # then parse rest of the file - else: # otherwise parse until the beginning of the next logical record - end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 - nrows = end_row-start_row+1 - - # Necessary to pass text to pd.read_fwf - if isinstance(filename, str): - filename = io.StringIO(filename) - - # Read file as a fixed width file (fwf) - data = pd.read_fwf(filename, skiprows=start_row, nrows=nrows, header=None, - colspecs=COL_SPECS, na_values=[-999.0, -99.9], - compression='infer') + # Determine start and end line of logical record LR0100 to be parsed + start_row = line_no_dict['0100'] + 1 # Start line number + # If LR0100 is the last logical record, then read rest of file + if start_row-1 == max(line_no_dict.values()): + end_row = num # then parse rest of the file + else: # otherwise parse until the beginning of the next logical record + end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 + nrows = end_row-start_row+1 + + # Read file as a fixed width file (fwf) + f.seek(0) # reset buffer to start of file + data = pd.read_fwf(f, skiprows=start_row, nrows=nrows, header=None, + colspecs=COL_SPECS, na_values=[-999.0, -99.9], + compression='infer') + f.close() # Create multi-index and unstack, resulting in one column for each variable data = data.set_index([data.index // 2, data.index % 2]) From c68cf2c5305bf2151ffb2fbdfda33971ac060f93 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 6 Jul 2021 23:26:39 -0400 Subject: [PATCH 06/32] Coverage for test_bsrn --- pvlib/tests/iotools/test_bsrn.py | 37 +++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 18d4be60f7..f141889b4d 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -6,18 +6,39 @@ import pandas as pd import pytest -from pvlib.iotools import bsrn -from ..conftest import DATA_DIR, assert_index_equal +from pvlib.iotools import read_bsrn, get_bsrn +from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal +@pytest.fixture +def expected_index(): + return pd.date_range(start='20160601', periods=43200, freq='1min', + tz='UTC') -@pytest.mark.parametrize('testfile,expected_index', [ - ('bsrn-pay0616.dat.gz', - pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC')), - ('bsrn-lr0100-pay0616.dat', - pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC')), +@pytest.mark.parametrize('testfile', [ + ('bsrn-pay0616.dat.gz'), + ('bsrn-lr0100-pay0616.dat'), ]) def test_read_bsrn(testfile, expected_index): - data = bsrn.read_bsrn(DATA_DIR / testfile) + data, metadata = read_bsrn(DATA_DIR / testfile) + assert_index_equal(expected_index, data.index) + assert 'ghi' in data.columns + assert 'dni_std' in data.columns + assert 'dhi_min' in data.columns + assert 'lwd_max' in data.columns + assert 'relative_humidity' in data.columns + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_bsrn(expected_index): + # Retrieve irradiance data from the BSRN FTP server + # the TAM station is chosen due to its small file sizes + data, metadata = get_bsrn( + start=pd.Timestamp(2016,6,1), + end=pd.Timestamp(2016,6,29), + station='tam', + username='bsrnftp', + password='bsrn1') assert_index_equal(expected_index, data.index) assert 'ghi' in data.columns assert 'dni_std' in data.columns From 6346319bde3bca3cc03fe5361581b76d9fce82bd Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 6 Jul 2021 23:50:05 -0400 Subject: [PATCH 07/32] Coverage for Warnings in case of no files avaiable --- pvlib/iotools/bsrn.py | 6 +++--- pvlib/tests/iotools/test_bsrn.py | 33 +++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 0ddd20a10d..c8bc3020ed 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -134,10 +134,10 @@ def get_bsrn(start, end, station, username, password, f.write(bio.getbuffer()) # Write local file # FTP client raises an error if the file does not exist on server except ftplib.error_perm as e: - if str(e) == '550 Failed to open file.': + if str(e) == '550 Failed to open file.': warnings.warn(f'File: {filename} does not exist') else: - raise ValueError(f'Error perm: {filename}') from e + raise ftplib.error_perm(e) ftp.close() # Close FTP connection # Concatenate monthly dataframes to one dataframe @@ -250,7 +250,7 @@ def read_bsrn(filename): if start_row-1 == max(line_no_dict.values()): end_row = num # then parse rest of the file else: # otherwise parse until the beginning of the next logical record - end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 + end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 # noqa: E501 nrows = end_row-start_row+1 # Read file as a fixed width file (fwf) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index f141889b4d..85fe23eaa7 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -14,6 +14,7 @@ def expected_index(): return pd.date_range(start='20160601', periods=43200, freq='1min', tz='UTC') + @pytest.mark.parametrize('testfile', [ ('bsrn-pay0616.dat.gz'), ('bsrn-lr0100-pay0616.dat'), @@ -34,14 +35,40 @@ def test_get_bsrn(expected_index): # Retrieve irradiance data from the BSRN FTP server # the TAM station is chosen due to its small file sizes data, metadata = get_bsrn( - start=pd.Timestamp(2016,6,1), - end=pd.Timestamp(2016,6,29), + start=pd.Timestamp(2016, 6, 1), + end=pd.Timestamp(2016, 6, 29), station='tam', username='bsrnftp', - password='bsrn1') + password='bsrn1', + local_path='') assert_index_equal(expected_index, data.index) assert 'ghi' in data.columns assert 'dni_std' in data.columns assert 'dhi_min' in data.columns assert 'lwd_max' in data.columns assert 'relative_humidity' in data.columns + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_bsrn_bad_station(): + # Test if ValueError is raised if a bad station name is passed + data, metadata = get_bsrn( + start=pd.Timestamp(2016, 6, 1), + end=pd.Timestamp(2016, 6, 29), + station='not_a_station_name', + username='bsrnftp', + password='bsrn1') + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_bsrn_no_files(): + # Test if Warning is given if no files are found for the entire time frame + with pytest.warns(UserWarning, match='No files'): + get_bsrn( + start=pd.Timestamp(1800, 6, 1), + end=pd.Timestamp(1800, 6, 29), + station='tam', + username='bsrnftp', + password='bsrn1') From 595abc2ba7a97c3a0b88262a5b4b39c8485788c2 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 6 Jul 2021 23:54:13 -0400 Subject: [PATCH 08/32] Fix stickler --- pvlib/iotools/bsrn.py | 2 +- pvlib/tests/iotools/test_bsrn.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index c8bc3020ed..212f6b7d81 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -250,7 +250,7 @@ def read_bsrn(filename): if start_row-1 == max(line_no_dict.values()): end_row = num # then parse rest of the file else: # otherwise parse until the beginning of the next logical record - end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 # noqa: E501 + end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 # noqa: E501 nrows = end_row-start_row+1 # Read file as a fixed width file (fwf) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 85fe23eaa7..46ca6520d9 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -9,6 +9,7 @@ from pvlib.iotools import read_bsrn, get_bsrn from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal + @pytest.fixture def expected_index(): return pd.date_range(start='20160601', periods=43200, freq='1min', From c49c051b6484741d7d88e307b0f5b056076cd945 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 7 Jul 2021 10:18:38 -0400 Subject: [PATCH 09/32] Correct test_get_bsrn_bad_station test --- pvlib/tests/iotools/test_bsrn.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 46ca6520d9..6993c27ac5 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -54,12 +54,13 @@ def test_get_bsrn(expected_index): @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_bsrn_bad_station(): # Test if ValueError is raised if a bad station name is passed - data, metadata = get_bsrn( - start=pd.Timestamp(2016, 6, 1), - end=pd.Timestamp(2016, 6, 29), - station='not_a_station_name', - username='bsrnftp', - password='bsrn1') + with pytest.raises(KeyError, match='sub-directory does not exist'): + get_bsrn( + start=pd.Timestamp(2016, 6, 1), + end=pd.Timestamp(2016, 6, 29), + station='not_a_station_name', + username='bsrnftp', + password='bsrn1') @pytest.mark.remote_data From 540fde3516ce187622169491668a94fdb3a1b4f8 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 7 Jul 2021 10:50:20 -0400 Subject: [PATCH 10/32] Specify warning category --- pvlib/iotools/bsrn.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 212f6b7d81..58ee09b03e 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -135,7 +135,8 @@ def get_bsrn(start, end, station, username, password, # FTP client raises an error if the file does not exist on server except ftplib.error_perm as e: if str(e) == '550 Failed to open file.': - warnings.warn(f'File: {filename} does not exist') + warnings.warn(f'File: {filename} does not exist', + category=UserWarning) else: raise ftplib.error_perm(e) ftp.close() # Close FTP connection @@ -146,7 +147,8 @@ def get_bsrn(start, end, station, username, password, else: # Return empty dataframe data = pd.DataFrame(columns=BSRN_COLUMNS) metadata = {} - warnings.warn('No files were avaiable for the specified timeframe.') + warnings.warn('No files were avaiable for the specified timeframe.', + category=UserWarning) # Return dataframe and metadata (metadata belongs to last available file) return data, metadata From 3353bd645c5b074595506c3b473ecb42f5b8a2e2 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Thu, 8 Jul 2021 22:28:49 -0400 Subject: [PATCH 11/32] Update dates used in test_get_bsrn_no_files --- pvlib/iotools/bsrn.py | 18 +++++++++--------- pvlib/tests/iotools/test_bsrn.py | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 58ee09b03e..401b2ed6f7 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -59,11 +59,6 @@ def get_bsrn(start, end, station, username, password, metadata: dict metadata for the last available monthly file - Notes - ----- - Required username and password can be obtained for free as described in the - BSRN's Data Release Guidelines [4]_. - Raises ------ ValueError @@ -73,6 +68,11 @@ def get_bsrn(start, end, station, username, password, If a requested file is missing a UserWarning is returned with the filename. + Notes + ----- + Required username and password can be obtained for free as described in the + BSRN's Data Release Guidelines [4]_. + Examples -------- >>> # Retrieve two months irradiance data from the Cabauw BSRN station @@ -124,10 +124,10 @@ def get_bsrn(start, end, station, username, password, raise ftplib.Error(response) # Decompress/unzip and decode the binary file text = gzip.decompress(bio.getvalue()).decode('utf-8') - # Convert string to StrinIO and parse data + # Convert string to StringIO and parse data dfi, metadata = read_bsrn(io.StringIO(text)) dfs.append(dfi) - # Save local file if local_path is specified + # Save file locally if local_path is specified if local_path is not None: # Create local file with open(os.path.join(local_path, filename), 'wb') as f: @@ -142,12 +142,12 @@ def get_bsrn(start, end, station, username, password, ftp.close() # Close FTP connection # Concatenate monthly dataframes to one dataframe - if len(dfs) > 0: + if len(dfs): data = pd.concat(dfs, axis='rows') else: # Return empty dataframe data = pd.DataFrame(columns=BSRN_COLUMNS) metadata = {} - warnings.warn('No files were avaiable for the specified timeframe.', + warnings.warn('No files were available for the specified timeframe.', category=UserWarning) # Return dataframe and metadata (metadata belongs to last available file) return data, metadata diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 6993c27ac5..d145022936 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -69,8 +69,8 @@ def test_get_bsrn_no_files(): # Test if Warning is given if no files are found for the entire time frame with pytest.warns(UserWarning, match='No files'): get_bsrn( - start=pd.Timestamp(1800, 6, 1), - end=pd.Timestamp(1800, 6, 29), + start=pd.Timestamp(1990, 6, 1), + end=pd.Timestamp(1990, 6, 29), station='tam', username='bsrnftp', password='bsrn1') From 5c37ecb2055a5475710f98d12952f2822d852d36 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 9 Jul 2021 12:39:02 -0400 Subject: [PATCH 12/32] Add secret credentials for testing --- pvlib/tests/iotools/test_bsrn.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index d145022936..29fe4882a5 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -2,14 +2,24 @@ tests for :mod:`pvlib.iotools.bsrn` """ - import pandas as pd import pytest - +import os from pvlib.iotools import read_bsrn, get_bsrn from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal +@pytest.fixture(scope="module") +def bsrn_credentials(): + """Supplies the BSRN FTP credentials for testing purposes. + + Users should obtain there own credentials as described in the `read_bsrn` + documentation.""" + bsrn_username = os.environ["BSRN_FTP_USERNAME"] + bsrn_password = os.environ["BSRN_FTP_PASSWORD"] + return bsrn_username, bsrn_password + + @pytest.fixture def expected_index(): return pd.date_range(start='20160601', periods=43200, freq='1min', @@ -32,15 +42,16 @@ def test_read_bsrn(testfile, expected_index): @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_get_bsrn(expected_index): +def test_get_bsrn(expected_index, bsrn_credentials): # Retrieve irradiance data from the BSRN FTP server # the TAM station is chosen due to its small file sizes + username, password = bsrn_ftp_credentials data, metadata = get_bsrn( start=pd.Timestamp(2016, 6, 1), end=pd.Timestamp(2016, 6, 29), station='tam', - username='bsrnftp', - password='bsrn1', + username=username, + password=password, local_path='') assert_index_equal(expected_index, data.index) assert 'ghi' in data.columns @@ -52,8 +63,9 @@ def test_get_bsrn(expected_index): @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_get_bsrn_bad_station(): +def test_get_bsrn_bad_station(bsrn_credentials): # Test if ValueError is raised if a bad station name is passed + username, password = bsrn_credentials with pytest.raises(KeyError, match='sub-directory does not exist'): get_bsrn( start=pd.Timestamp(2016, 6, 1), @@ -65,7 +77,8 @@ def test_get_bsrn_bad_station(): @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) -def test_get_bsrn_no_files(): +def test_get_bsrn_no_files(bsrn_credentials): + username, password = bsrn_credentials # Test if Warning is given if no files are found for the entire time frame with pytest.warns(UserWarning, match='No files'): get_bsrn( From 2fe7a5074304a3ff7d79d94ad2de5b7c22d1120a Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 9 Jul 2021 12:39:27 -0400 Subject: [PATCH 13/32] Documentation updates --- pvlib/iotools/bsrn.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 401b2ed6f7..576bd0ff60 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -32,10 +32,6 @@ def get_bsrn(start, end, station, username, password, of high-quality solar radiation monitoring stations as described in [1]_. Data is retrieved from the BSRN FTP server [2]_. - Currently only the basic measurements (LR0100) are parsed, which include - global, diffuse, direct, and downwelling long-wave radiation [3]_. Future - updates may include parsing of additional data and metadata. - Parameters ---------- start: datetime-like @@ -61,18 +57,21 @@ def get_bsrn(start, end, station, username, password, Raises ------ - ValueError - If the specified station does not exist on the FTP server or if no - files match the specified station and timeframe. + KeyError + If the specified station does not exist on the FTP server. UserWarning If a requested file is missing a UserWarning is returned with the - filename. + filename. Also, if no files match the specified station and timeframe. Notes ----- Required username and password can be obtained for free as described in the BSRN's Data Release Guidelines [4]_. + Currently only the basic measurements (LR0100) are parsed, which include + global, diffuse, direct, and downwelling long-wave radiation [3]_. Future + updates may include parsing of additional data and metadata. + Examples -------- >>> # Retrieve two months irradiance data from the Cabauw BSRN station @@ -92,7 +91,7 @@ def get_bsrn(start, end, station, username, password, .. [2] `BSRN Data Retrieval via FTP `_ .. [3] `Update of the Technical Plan for BSRN Data Management, 2013, - Global Climate Observing System (GCOS) GCOS-172. + Global Climate Observing System (GCOS) GCOS-174. `_ .. [4] `BSRN Data Release Guidelines `_ @@ -178,6 +177,9 @@ def read_bsrn(filename): data: DataFrame A DataFrame with the columns as described below. For more extensive description of the variables, consult [2]_. + metadata: dict + Dictionary containing metadata. Currently the dict is empty as no + metadata is parsed. Notes ----- @@ -219,7 +221,7 @@ def read_bsrn(filename): Network (BSRN) `_ .. [2] `Update of the Technical Plan for BSRN Data Management, 2013, - Global Climate Observing System (GCOS) GCOS-172. + Global Climate Observing System (GCOS) GCOS-174. `_ .. [3] `BSRN Data Retrieval via FTP `_ From aa983d7d41ea744d7f825d27dba50590f75a264b Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Fri, 9 Jul 2021 23:02:37 -0400 Subject: [PATCH 14/32] Move line_no_dict 7 lines down --- pvlib/iotools/bsrn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 576bd0ff60..cb7dcb451e 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -229,8 +229,6 @@ def read_bsrn(filename): `_ """ - # Read file and store the starting line number for each logical record (LR) - line_no_dict = {} if isinstance(filename, io.StringIO): f = filename elif str(filename).endswith('.gz'): # check if file is gzipped (.gz) @@ -238,6 +236,8 @@ def read_bsrn(filename): else: f = open(filename, 'r') if True: + # Read file and store the starting line number for each logical record (LR) + line_no_dict = {} f.readline() # first line should be *U0001, so read it and discard line_no_dict['0001'] = 0 date_line = f.readline() # second line contains the year and month From 90e162b0b4f623ad9f18e716e27e134d163e99a2 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 12 Jul 2021 14:32:50 -0400 Subject: [PATCH 15/32] Add requires_bsrn_credentials to conftest.py --- pvlib/tests/conftest.py | 13 +++++++++++++ pvlib/tests/iotools/test_bsrn.py | 8 ++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pvlib/tests/conftest.py b/pvlib/tests/conftest.py index 95d0f725d9..a3cba1e7b8 100644 --- a/pvlib/tests/conftest.py +++ b/pvlib/tests/conftest.py @@ -3,6 +3,7 @@ import warnings import pandas as pd +import os from pkg_resources import parse_version import pytest from functools import wraps @@ -82,6 +83,18 @@ def assert_frame_equal(left, right, **kwargs): reason='does not run on windows') +try: + # Attempt to load BSRN credentials used for testing pvlib.iotools.get_bsrn + bsrn_username = os.environ["BSRN_FTP_USERNAME"] + bsrn_password = os.environ["BSRN_FTP_PASSWORD"] + has_bsrn_credentials = True +except KeyError: + has_bsrn_credentials = False + +requires_bsrn_credentials = pytest.mark.skipif( + not has_bsrn_credentials, reason='requires bsrn credentials') + + try: import statsmodels # noqa: F401 has_statsmodels = True diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 29fe4882a5..3ee314b5fb 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -6,7 +6,8 @@ import pytest import os from pvlib.iotools import read_bsrn, get_bsrn -from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal +from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, + requires_bsrn_credentials) @pytest.fixture(scope="module") @@ -40,12 +41,13 @@ def test_read_bsrn(testfile, expected_index): assert 'relative_humidity' in data.columns +@requires_bsrn_credentials @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_bsrn(expected_index, bsrn_credentials): # Retrieve irradiance data from the BSRN FTP server # the TAM station is chosen due to its small file sizes - username, password = bsrn_ftp_credentials + username, password = bsrn_credentials data, metadata = get_bsrn( start=pd.Timestamp(2016, 6, 1), end=pd.Timestamp(2016, 6, 29), @@ -61,6 +63,7 @@ def test_get_bsrn(expected_index, bsrn_credentials): assert 'relative_humidity' in data.columns +@requires_bsrn_credentials @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_bsrn_bad_station(bsrn_credentials): @@ -75,6 +78,7 @@ def test_get_bsrn_bad_station(bsrn_credentials): password='bsrn1') +@requires_bsrn_credentials @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_bsrn_no_files(bsrn_credentials): From 428f028ceac7efccb7adae104ff059ecdd691a3f Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 19 Jul 2021 12:47:25 -0600 Subject: [PATCH 16/32] Add parsing of logical records 0300 and 0500 --- pvlib/iotools/bsrn.py | 207 ++++++++++++++++++++++++++++++------------ 1 file changed, 149 insertions(+), 58 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index cb7dcb451e..93fd528bc7 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -11,17 +11,37 @@ BSRN_FTP_URL = "ftp.bsrn.awi.de" -COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), (32, 39), - (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] - -BSRN_COLUMNS = ['day', 'minute', - 'ghi', 'ghi_std', 'ghi_min', 'ghi_max', - 'dni', 'dni_std', 'dni_min', 'dni_max', - 'empty', 'empty', 'empty', 'empty', 'empty', - 'dhi', 'dhi_std', 'dhi_min', 'dhi_max', - 'lwd', 'lwd_std', 'lwd_min', 'lwd_max', - 'temp_air', 'relative_humidity', 'pressure'] - +COL_SPECS_LR0100 = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), + (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), + (70, 75)] + +COL_SPECS_LR0300 = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27)] + +COL_SPECS_LR0500 = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32), + (32, 38), (38, 44), (44, 50), (50, 56), (56, 62), (62, 68), + (68, 74), (74, 80)] + +BSRN_LR0100_COLUMNS = ['day', 'minute', + 'ghi', 'ghi_std', 'ghi_min', 'ghi_max', + 'dni', 'dni_std', 'dni_min', 'dni_max', + 'empty', 'empty', 'empty', 'empty', 'empty', + 'dhi', 'dhi_std', 'dhi_min', 'dhi_max', + 'lwd', 'lwd_std', 'lwd_min', 'lwd_max', + 'temp_air', 'relative_humidity', 'pressure'] + +BSRN_LR0300_COLUMNS = ['day', 'minute', 'upward short-wave reflected', + 'upward long-wave', 'net radiation'] + +BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global_mean', 'uva_global_std', + 'uva_global_min', 'uva_global_max', 'uvb_direct_mean', + 'uvb_direct_std', 'uvb_direct_min', 'uvb_direct_max', + 'empty', 'empty', 'empty', 'empty', + 'uvb_global_mean', 'uvb_global_std', 'uvb_global_min', + 'uvb_global_max', 'uvb_diffuse_mean', 'uvb_diffuse_std', + 'uvb_diffuse_mean', 'uvb_diffuse_std', + 'uvb_diffuse_min', 'uvb_diffuse_max', + 'uvb_reflect_mean', 'uvb_reflect_std', + 'uvb_reflect_min', 'uvb_reflect_max'] def get_bsrn(start, end, station, username, password, local_path=None): @@ -59,6 +79,9 @@ def get_bsrn(start, end, station, username, password, ------ KeyError If the specified station does not exist on the FTP server. + + Warning + ------- UserWarning If a requested file is missing a UserWarning is returned with the filename. Also, if no files match the specified station and timeframe. @@ -72,6 +95,11 @@ def get_bsrn(start, end, station, username, password, global, diffuse, direct, and downwelling long-wave radiation [3]_. Future updates may include parsing of additional data and metadata. + Important + --------- + While data from the BSRN is generally of high-quality, measurement data + should always be quality controlled before usage! + Examples -------- >>> # Retrieve two months irradiance data from the Cabauw BSRN station @@ -134,20 +162,18 @@ def get_bsrn(start, end, station, username, password, # FTP client raises an error if the file does not exist on server except ftplib.error_perm as e: if str(e) == '550 Failed to open file.': - warnings.warn(f'File: {filename} does not exist', - category=UserWarning) + warnings.warn(f'File: {filename} does not exist') else: raise ftplib.error_perm(e) - ftp.close() # Close FTP connection + ftp.quit() # Close and exit FTP connection # Concatenate monthly dataframes to one dataframe if len(dfs): data = pd.concat(dfs, axis='rows') else: # Return empty dataframe - data = pd.DataFrame(columns=BSRN_COLUMNS) + data = pd.DataFrame(columns=BSRN_LR0100_COLUMNS) metadata = {} - warnings.warn('No files were available for the specified timeframe.', - category=UserWarning) + warnings.warn('No files were available for the specified timeframe.') # Return dataframe and metadata (metadata belongs to last available file) return data, metadata @@ -178,8 +204,7 @@ def read_bsrn(filename): A DataFrame with the columns as described below. For more extensive description of the variables, consult [2]_. metadata: dict - Dictionary containing metadata. Currently the dict is empty as no - metadata is parsed. + Dictionary containing metadata (primarily from LR0004). Notes ----- @@ -236,52 +261,118 @@ def read_bsrn(filename): else: f = open(filename, 'r') if True: - # Read file and store the starting line number for each logical record (LR) - line_no_dict = {} + + # Parse metadata f.readline() # first line should be *U0001, so read it and discard - line_no_dict['0001'] = 0 - date_line = f.readline() # second line contains the year and month + date_line = f.readline() # second line contains important metadata start_date = pd.Timestamp(year=int(date_line[7:11]), month=int(date_line[3:6]), day=1, tz='UTC') # BSRN timestamps are UTC - for num, line in enumerate(f, start=2): - if line.startswith('*'): # Find start of all logical records - line_no_dict[line[2:6]] = num # key is 4 digit LR number - - # Determine start and end line of logical record LR0100 to be parsed - start_row = line_no_dict['0100'] + 1 # Start line number - # If LR0100 is the last logical record, then read rest of file - if start_row-1 == max(line_no_dict.values()): - end_row = num # then parse rest of the file - else: # otherwise parse until the beginning of the next logical record - end_row = min([i for i in line_no_dict.values() if i > start_row]) - 1 # noqa: E501 - nrows = end_row-start_row+1 - - # Read file as a fixed width file (fwf) - f.seek(0) # reset buffer to start of file - data = pd.read_fwf(f, skiprows=start_row, nrows=nrows, header=None, - colspecs=COL_SPECS, na_values=[-999.0, -99.9], - compression='infer') - f.close() - # Create multi-index and unstack, resulting in one column for each variable - data = data.set_index([data.index // 2, data.index % 2]) - data = data.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + metadata = {} # Initilize dictionary containing metadata + metadata['start date'] = start_date + metadata['station identification number'] = int(date_line[::3]) + metadata['version of data'] = int(date_line.split()[-1]) + for line in f: + if line[2:6] == '0004': # stop once LR0004 has been reached + break + elif line == '': + raise ValueError('Mandatatory record LR0004 not found.') + metadata['date when station description changed'] = f.readline().strip() # noqa: E501 + metadata['surface type'] = int(f.readline(3)) + metadata['topography type'] = int(f.readline()) + metadata['address'] = f.readline().strip().strip() + metadata['telephone no of station'] = f.readline(20).strip() + metadata['FAX no. of station'] = f.readline().strip() + metadata['TCP/IP no. of station'] = f.readline(15).strip() + metadata['e-mail address of station'] = f.readline().strip() + metadata['latitude'] = float(f.readline(8)) + metadata['longitude'] = float(f.readline(8)) + metadata['altitude'] = int(f.readline(5)) + metadata['identification of "SYNOP" station'] = f.readline().strip() + metadata['date when horizon changed'] = f.readline().strip() + horizon = [] # list for raw horizon elevation data + while True: + line = f.readline() + if ('*' in line) | (line == ''): + break + else: + horizon += [int(i) for i in line.split()] + metadata['horizon'] = pd.Series(horizon[1::2],horizon[::2])\ + .sort_index().drop(-1) + + # Read file and store the starting line number and number of lines for + # each logical record (LR) + f.seek(0) # reset buffer to start of file + lr_startrow = {} # Dictionary of starting line number for each LR + lr_nrows = {} # Dictionary of end line number for each LR + for num, line in enumerate(f): + if line.startswith('*'): # Find start of all logical records + if len(lr_startrow) >= 1: + lr_nrows[lr] = num - max(lr_startrow.values()) - 1 + lr = line[2:6] # string of 4 digit LR number + lr_startrow[lr] = num + lr_nrows[lr] = num - lr_startrow[lr] - # Sort columns to match original order and assign column names - data = data.reindex(sorted(data.columns), axis='columns') - data.columns = BSRN_COLUMNS - # Drop empty columns - data = data.drop('empty', axis='columns') + # Read LR01000 as a fixed width file (fwf) + f.seek(0) # reset buffer to start of file + data_0100 = pd.read_fwf(f, skiprows=lr_startrow['0100'] + 1, + nrows=lr_nrows['0100'], header=None, + colspecs=COL_SPECS_LR0100, + na_values=[-999.0, -99.9]) + # Create multi-index and unstack, resulting in one column for each variable + data_0100 = data_0100.set_index([data_0100.index // 2, data_0100.index % 2]) + data_0100 = data_0100.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + # Sort columns to match original order and assign column names + data_0100 = data_0100.reindex(sorted(data_0100.columns), axis='columns') + data_0100.columns = BSRN_LR0100_COLUMNS + # Drop empty columns + data_0100 = data_0100.drop('empty', axis='columns') + # Change day and minute type to integer + data_0100['day'] = data_0100['day'].astype('Int64') + data_0100['minute'] = data_0100['minute'].astype('Int64') + + # Set datetime index + data_0100.index = (start_date + + pd.to_timedelta(data_0100['day']-1, unit='d') + + pd.to_timedelta(data_0100['minute'], unit='T')) + dfs = [data_0100] + + f.seek(0) + if '0300' in lr_startrow.keys(): + data_0300 = pd.read_fwf(f, skiprows=lr_startrow['0300']+1, + nrows=lr_nrows['0300'], header=None, + na_values=[-999.0, -99.9], + colspecs=COL_SPECS_LR0300, + names=BSRN_LR0300_COLUMNS, + dtypes=[int, int, float, float, float]) + data_0300.index = (start_date + + pd.to_timedelta(data_0300['day']-1, unit='d') + + pd.to_timedelta(data_0300['minute'], unit='T')) + data_0300 = data_0300.drop(columns=['day', 'minute']) + dfs.append(data_0300) + + f.seek(0) + if '0500' in lr_startrow.keys(): + data_0500 = pd.read_fwf(f, skiprows=lr_startrow['0500']+1, + nrows=lr_nrows['0500'], header=None, + na_values=[-99.9], + colspecs=COL_SPECS_LR0500) + # Create multi-index and unstack, resulting in one column for each variable + data_0500 = data_0500.set_index([data_0500.index // 2, data_0500.index % 2]) + data_0500 = data_0500.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + # Sort columns to match original order and assign column names + data_0500 = data_0500.reindex(sorted(data_0500.columns), axis='columns') + data_0500.columns = BSRN_LR0500_COLUMNS + data_0500 = data_0500.drop('empty', axis='columns') + data_0500.index = (start_date + + pd.to_timedelta(data_0500['day']-1, unit='d') + + pd.to_timedelta(data_0500['minute'], unit='T')) + data_0500 = data_0500.drop(columns=['day', 'minute']) + dfs.append(data_0500) - # Change day and minute type to integer - data['day'] = data['day'].astype('Int64') - data['minute'] = data['minute'].astype('Int64') + f.close() - # Set datetime index - data.index = (start_date - + pd.to_timedelta(data['day']-1, unit='d') - + pd.to_timedelta(data['minute'], unit='T')) + data = pd.concat(dfs, axis='columns') - metadata = {} return data, metadata From 7f4be472c9bb286921871608d5731ac2560ccef3 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 19 Jul 2021 13:17:56 -0600 Subject: [PATCH 17/32] Raise os.environ as ValueError for debugging --- pvlib/tests/iotools/test_bsrn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 3ee314b5fb..9d11d14b6f 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -10,6 +10,10 @@ requires_bsrn_credentials) +def test_os_variables(): + raise ValueError(os.environ) + + @pytest.fixture(scope="module") def bsrn_credentials(): """Supplies the BSRN FTP credentials for testing purposes. From 14a43761209cb78a2cfd151dc2cb696157869386 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 19 Jul 2021 13:35:50 -0600 Subject: [PATCH 18/32] Export BSRN credentials in conda_linux.yml --- ci/azure/conda_linux.yml | 2 ++ pvlib/tests/iotools/test_bsrn.py | 4 ---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ci/azure/conda_linux.yml b/ci/azure/conda_linux.yml index 583ac71226..3bf8215cc4 100644 --- a/ci/azure/conda_linux.yml +++ b/ci/azure/conda_linux.yml @@ -38,6 +38,8 @@ jobs: - script: | source activate test_env export NREL_API_KEY=$(nrelApiKey) + export BSRN_FTP_USERNAME=$(BSRN_FTP_USERNAME) + export BSRN_FTP_PASSWORD=$(BSRN_FTP_PASSWORD) pytest pvlib --remote-data --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html displayName: 'pytest' - task: PublishTestResults@2 diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 9d11d14b6f..3ee314b5fb 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -10,10 +10,6 @@ requires_bsrn_credentials) -def test_os_variables(): - raise ValueError(os.environ) - - @pytest.fixture(scope="module") def bsrn_credentials(): """Supplies the BSRN FTP credentials for testing purposes. From 7c40574f9bde781f793fac2cbbd7ce319ec54a88 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 19 Jul 2021 18:03:17 -0600 Subject: [PATCH 19/32] Add parse_bsrn --- pvlib/iotools/bsrn.py | 307 +++++++++++++++++-------------- pvlib/tests/iotools/test_bsrn.py | 2 +- 2 files changed, 171 insertions(+), 138 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 93fd528bc7..4add7ba113 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -11,15 +11,15 @@ BSRN_FTP_URL = "ftp.bsrn.awi.de" -COL_SPECS_LR0100 = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27, 32), - (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), - (70, 75)] +BSRN_LR0100_COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27), + (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), + (55, 64), (64, 70), (70, 75)] -COL_SPECS_LR0300 = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27)] +BSRN_LR0300_COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27)] -COL_SPECS_LR0500 = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32), - (32, 38), (38, 44), (44, 50), (50, 56), (56, 62), (62, 68), - (68, 74), (74, 80)] +BSRN_LR0500_COL_SPECS = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32), + (32, 38), (38, 44), (44, 50), (50, 56), (56, 62), + (62, 68), (68, 74), (74, 80)] BSRN_LR0100_COLUMNS = ['day', 'minute', 'ghi', 'ghi_std', 'ghi_min', 'ghi_max', @@ -43,7 +43,8 @@ 'uvb_reflect_mean', 'uvb_reflect_std', 'uvb_reflect_min', 'uvb_reflect_max'] -def get_bsrn(start, end, station, username, password, + +def get_bsrn(start, end, station, username, password, logical_records=['0100'], local_path=None): """ Retrieve ground measured irradiance data from the BSRN FTP server. @@ -64,6 +65,9 @@ def get_bsrn(start, end, station, username, password, username for accessing the BSRN ftp server password: str password for accessing the BSRN ftp server + logical_records: list, default: ['0100'] + List of the logical records (LR) to parse. Options are: 0100, 0300, + and 0500. local_path: str or path-like, default: None, optional If specified, path (abs. or relative) of where to save files @@ -107,7 +111,7 @@ def get_bsrn(start, end, station, username, password, >>> start=pd.Timestamp(2020,1,1), end=pd.Timestamp(2020,12,1), # doctest: +SKIP >>> station='cab', username='yourusername', password='yourpassword') # doctest: +SKIP - See also + See Also -------- pvlib.iotools.read_bsrn @@ -152,7 +156,7 @@ def get_bsrn(start, end, station, username, password, # Decompress/unzip and decode the binary file text = gzip.decompress(bio.getvalue()).decode('utf-8') # Convert string to StringIO and parse data - dfi, metadata = read_bsrn(io.StringIO(text)) + dfi, metadata = parse_bsrn(io.StringIO(text)) dfs.append(dfi) # Save file locally if local_path is specified if local_path is not None: @@ -178,7 +182,150 @@ def get_bsrn(start, end, station, username, password, return data, metadata -def read_bsrn(filename): +def parse_bsrn(fbuf, logical_records='0100'): + """ + Parse a file-like buffer of BSRN station-to-archive file into a DataFrame. + + Parameters + ---------- + fbuf: file-like buffer + Buffer of a BSRN station-to-archive data file + logical_records: list, default: ['0100'] + List of the logical records (LR) to parse. Options are: 0100, 0300, + and 0500. + + Returns + ------- + data: DataFrame + A DataFrame with the columns as described below. See + pvlib.iotools.read_bsrn for fields. + metadata: dict + Dictionary containing metadata (primarily from LR0004). + + See Also + -------- + pvlib.iotools.read_bsrn, pvlib.iotools.get_bsrn + + """ + # Parse metadata + fbuf.readline() # first line should be *U0001, so read it and discard + date_line = fbuf.readline() # second line contains important metadata + start_date = pd.Timestamp(year=int(date_line[7:11]), + month=int(date_line[3:6]), day=1, + tz='UTC') # BSRN timestamps are UTC + + metadata = {} # Initilize dictionary containing metadata + metadata['start date'] = start_date + metadata['station identification number'] = int(date_line[:3]) + metadata['version of data'] = int(date_line.split()[-1]) + for line in fbuf: + if line[2:6] == '0004': # stop once LR0004 has been reached + break + elif line == '': + raise ValueError('Mandatatory record LR0004 not found.') + metadata['date when station description changed'] = fbuf.readline().strip() + metadata['surface type'] = int(fbuf.readline(3)) + metadata['topography type'] = int(fbuf.readline()) + metadata['address'] = fbuf.readline().strip().strip() + metadata['telephone no. of station'] = fbuf.readline(20).strip() + metadata['FAX no. of station'] = fbuf.readline().strip() + metadata['TCP/IP no. of station'] = fbuf.readline(15).strip() + metadata['e-mail address of station'] = fbuf.readline().strip() + metadata['latitude'] = float(fbuf.readline(8)) + metadata['longitude'] = float(fbuf.readline(8)) + metadata['altitude'] = int(fbuf.readline(5)) + metadata['identification of "SYNOP" station'] = fbuf.readline().strip() + metadata['date when horizon changed'] = fbuf.readline().strip() + # Pass last section of LR0004 containing the horizon elevation data + horizon = [] # list for raw horizon elevation data + while True: + line = fbuf.readline() + if ('*' in line) | (line == ''): + break + else: + horizon += [int(i) for i in line.split()] + metadata['horizon'] = pd.Series(horizon[1::2], horizon[::2]).sort_index().drop(-1) # noqa: E501 + + # Read file and store the starting line number and number of lines for + # each logical record (LR) + fbuf.seek(0) # reset buffer to start of file + lr_startrow = {} # Dictionary of starting line number for each LR + lr_nrows = {} # Dictionary of end line number for each LR + for num, line in enumerate(fbuf): + if line.startswith('*'): # Find start of all logical records + if len(lr_startrow) >= 1: + lr_nrows[lr] = num - max(lr_startrow.values()) - 1 + lr = line[2:6] # string of 4 digit LR number + lr_startrow[lr] = num + lr_nrows[lr] = num - lr_startrow[lr] + + for lr in logical_records: + if lr not in ['0100', '0300', '0500']: + raise ValueError(f"Logical record {lr} not in " + f"{['0100', '0300','0500']}.") + dfs = [] # Initialize empty list for dataframe + + # Parse LR0100 - basic measurements including GHI, DNI, DHI and temperature + if ('0100' in lr_startrow.keys()) & ('0100' in logical_records): + fbuf.seek(0) # reset buffer to start of file + LR_0100 = pd.read_fwf(fbuf, skiprows=lr_startrow['0100'] + 1, + nrows=lr_nrows['0100'], header=None, + colspecs=BSRN_LR0100_COL_SPECS, + na_values=[-999.0, -99.9]) + # Create multi-index and unstack, resulting in 1 col for each variable + LR_0100 = LR_0100.set_index([LR_0100.index // 2, LR_0100.index % 2]) + LR_0100 = LR_0100.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + # Sort columns to match original order and assign column names + LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns') + LR_0100.columns = BSRN_LR0100_COLUMNS + # Drop empty columns + LR_0100 = LR_0100.drop('empty', axis='columns') + # Change day and minute type to integer + LR_0100['day'] = LR_0100['day'].astype('Int64') + LR_0100['minute'] = LR_0100['minute'].astype('Int64') + + # Set datetime index + LR_0100.index = (start_date + pd.to_timedelta(LR_0100['day']-1, unit='d') # noqa: E501 + + pd.to_timedelta(LR_0100['minute'], unit='T')) + dfs.append(LR_0100) + + # Parse LR0300 - other time series data, including upward and net radiation + if ('0300' in lr_startrow.keys()) & ('0300' in logical_records): + fbuf.seek(0) # reset buffer to start of file + LR_0300 = pd.read_fwf(fbuf, skiprows=lr_startrow['0300']+1, + nrows=lr_nrows['0300'], header=None, + na_values=[-999.0, -99.9], + colspecs=BSRN_LR0300_COL_SPECS, + names=BSRN_LR0300_COLUMNS) + LR_0300.index = (start_date + + pd.to_timedelta(LR_0300['day']-1, unit='d') + + pd.to_timedelta(LR_0300['minute'], unit='T')) + LR_0300 = LR_0300.drop(columns=['day', 'minute']).astype(float) + dfs.append(LR_0300) + + # Parse LR0500 - UV measurements + if ('0500' in lr_startrow.keys()) & ('0500' in logical_records): + fbuf.seek(0) # reset buffer to start of file + LR_0500 = pd.read_fwf(fbuf, skiprows=lr_startrow['0500']+1, + nrows=lr_nrows['0500'], na_values=[-99.9], + header=None, colspecs=BSRN_LR0500_COL_SPECS) + # Create multi-index and unstack, resulting in 1 col for each variable + LR_0500 = LR_0500.set_index([LR_0500.index // 2, LR_0500.index % 2]) + LR_0500 = LR_0500.unstack(level=1).swaplevel(i=0, j=1, axis='columns') + # Sort columns to match original order and assign column names + LR_0500 = LR_0500.reindex(sorted(LR_0500.columns), axis='columns') + LR_0500.columns = BSRN_LR0500_COLUMNS + LR_0500.index = (start_date + + pd.to_timedelta(LR_0500['day']-1, unit='d') + + pd.to_timedelta(LR_0500['minute'], unit='T')) + LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute']) + dfs.append(LR_0500) + + data = pd.concat(dfs, axis='columns') + return data, metadata + + +def read_bsrn(filename, logical_records=['0100']): """ Read a BSRN station-to-archive file into a DataFrame. @@ -195,8 +342,11 @@ def read_bsrn(filename): Parameters ---------- - filename: str, path-like or file-like object - Name, path, or buffer of a BSRN station-to-archive data file + filename: str or path-like + Name or path of a BSRN station-to-archive data file + logical_records: list, default: ['0100'] + List of the logical records (LR) to parse. Options are: 0100, 0300, + and 0500. Returns ------- @@ -236,9 +386,9 @@ def read_bsrn(filename): pressure float Atmospheric pressure [hPa] ======================= ====== ========================================== - See also + See Also -------- - pvlib.iotools.get_bsrn + pvlib.iotools.parse_bsrn, pvlib.iotools.get_bsrn References ---------- @@ -253,126 +403,9 @@ def read_bsrn(filename): .. [4] `BSRN Data Release Guidelines `_ """ - - if isinstance(filename, io.StringIO): - f = filename - elif str(filename).endswith('.gz'): # check if file is gzipped (.gz) - f = gzip.open(filename, 'rt') + if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file + open_func, mode = gzip.open, 'rt' else: - f = open(filename, 'r') - if True: - - # Parse metadata - f.readline() # first line should be *U0001, so read it and discard - date_line = f.readline() # second line contains important metadata - start_date = pd.Timestamp(year=int(date_line[7:11]), - month=int(date_line[3:6]), day=1, - tz='UTC') # BSRN timestamps are UTC - - metadata = {} # Initilize dictionary containing metadata - metadata['start date'] = start_date - metadata['station identification number'] = int(date_line[::3]) - metadata['version of data'] = int(date_line.split()[-1]) - for line in f: - if line[2:6] == '0004': # stop once LR0004 has been reached - break - elif line == '': - raise ValueError('Mandatatory record LR0004 not found.') - metadata['date when station description changed'] = f.readline().strip() # noqa: E501 - metadata['surface type'] = int(f.readline(3)) - metadata['topography type'] = int(f.readline()) - metadata['address'] = f.readline().strip().strip() - metadata['telephone no of station'] = f.readline(20).strip() - metadata['FAX no. of station'] = f.readline().strip() - metadata['TCP/IP no. of station'] = f.readline(15).strip() - metadata['e-mail address of station'] = f.readline().strip() - metadata['latitude'] = float(f.readline(8)) - metadata['longitude'] = float(f.readline(8)) - metadata['altitude'] = int(f.readline(5)) - metadata['identification of "SYNOP" station'] = f.readline().strip() - metadata['date when horizon changed'] = f.readline().strip() - horizon = [] # list for raw horizon elevation data - while True: - line = f.readline() - if ('*' in line) | (line == ''): - break - else: - horizon += [int(i) for i in line.split()] - metadata['horizon'] = pd.Series(horizon[1::2],horizon[::2])\ - .sort_index().drop(-1) - - # Read file and store the starting line number and number of lines for - # each logical record (LR) - f.seek(0) # reset buffer to start of file - lr_startrow = {} # Dictionary of starting line number for each LR - lr_nrows = {} # Dictionary of end line number for each LR - for num, line in enumerate(f): - if line.startswith('*'): # Find start of all logical records - if len(lr_startrow) >= 1: - lr_nrows[lr] = num - max(lr_startrow.values()) - 1 - lr = line[2:6] # string of 4 digit LR number - lr_startrow[lr] = num - lr_nrows[lr] = num - lr_startrow[lr] - - # Read LR01000 as a fixed width file (fwf) - f.seek(0) # reset buffer to start of file - data_0100 = pd.read_fwf(f, skiprows=lr_startrow['0100'] + 1, - nrows=lr_nrows['0100'], header=None, - colspecs=COL_SPECS_LR0100, - na_values=[-999.0, -99.9]) - # Create multi-index and unstack, resulting in one column for each variable - data_0100 = data_0100.set_index([data_0100.index // 2, data_0100.index % 2]) - data_0100 = data_0100.unstack(level=1).swaplevel(i=0, j=1, axis='columns') - # Sort columns to match original order and assign column names - data_0100 = data_0100.reindex(sorted(data_0100.columns), axis='columns') - data_0100.columns = BSRN_LR0100_COLUMNS - # Drop empty columns - data_0100 = data_0100.drop('empty', axis='columns') - # Change day and minute type to integer - data_0100['day'] = data_0100['day'].astype('Int64') - data_0100['minute'] = data_0100['minute'].astype('Int64') - - # Set datetime index - data_0100.index = (start_date - + pd.to_timedelta(data_0100['day']-1, unit='d') - + pd.to_timedelta(data_0100['minute'], unit='T')) - dfs = [data_0100] - - f.seek(0) - if '0300' in lr_startrow.keys(): - data_0300 = pd.read_fwf(f, skiprows=lr_startrow['0300']+1, - nrows=lr_nrows['0300'], header=None, - na_values=[-999.0, -99.9], - colspecs=COL_SPECS_LR0300, - names=BSRN_LR0300_COLUMNS, - dtypes=[int, int, float, float, float]) - data_0300.index = (start_date - + pd.to_timedelta(data_0300['day']-1, unit='d') - + pd.to_timedelta(data_0300['minute'], unit='T')) - data_0300 = data_0300.drop(columns=['day', 'minute']) - dfs.append(data_0300) - - f.seek(0) - if '0500' in lr_startrow.keys(): - data_0500 = pd.read_fwf(f, skiprows=lr_startrow['0500']+1, - nrows=lr_nrows['0500'], header=None, - na_values=[-99.9], - colspecs=COL_SPECS_LR0500) - # Create multi-index and unstack, resulting in one column for each variable - data_0500 = data_0500.set_index([data_0500.index // 2, data_0500.index % 2]) - data_0500 = data_0500.unstack(level=1).swaplevel(i=0, j=1, axis='columns') - # Sort columns to match original order and assign column names - data_0500 = data_0500.reindex(sorted(data_0500.columns), axis='columns') - data_0500.columns = BSRN_LR0500_COLUMNS - data_0500 = data_0500.drop('empty', axis='columns') - data_0500.index = (start_date - + pd.to_timedelta(data_0500['day']-1, unit='d') - + pd.to_timedelta(data_0500['minute'], unit='T')) - data_0500 = data_0500.drop(columns=['day', 'minute']) - dfs.append(data_0500) - - f.close() - - data = pd.concat(dfs, axis='columns') - - return data, metadata + open_func, mode = open, 'r' + with open_func(filename, mode) as f: + return parse_bsrn(f, logical_records) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index 3ee314b5fb..d1b23688bd 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -13,7 +13,7 @@ @pytest.fixture(scope="module") def bsrn_credentials(): """Supplies the BSRN FTP credentials for testing purposes. - + Users should obtain there own credentials as described in the `read_bsrn` documentation.""" bsrn_username = os.environ["BSRN_FTP_USERNAME"] From cf245c859a03dc2074fbb3538973f9e7d6ea448a Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 19 Jul 2021 18:32:22 -0600 Subject: [PATCH 20/32] Fix stickler and minor doc changes --- docs/sphinx/source/api.rst | 3 ++- pvlib/iotools/__init__.py | 3 ++- pvlib/iotools/bsrn.py | 42 +++++++++++++++++--------------------- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst index 737a32e9b7..29ba841546 100644 --- a/docs/sphinx/source/api.rst +++ b/docs/sphinx/source/api.rst @@ -486,10 +486,11 @@ of sources and file formats relevant to solar energy modeling. iotools.parse_psm3 iotools.get_pvgis_tmy iotools.read_pvgis_tmy - iotools.get_bsrn iotools.get_pvgis_hourly iotools.read_pvgis_hourly + iotools.get_bsrn iotools.read_bsrn + iotools.parse_bsrn iotools.get_cams iotools.read_cams iotools.parse_cams diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 996f2fb62a..b02ce243ae 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -15,8 +15,9 @@ from pvlib.iotools.pvgis import get_pvgis_tmy, read_pvgis_tmy # noqa: F401 from pvlib.iotools.pvgis import read_pvgis_hourly # noqa: F401 from pvlib.iotools.pvgis import get_pvgis_hourly # noqa: F401 -from pvlib.iotools.bsrn import read_bsrn # noqa: F401 from pvlib.iotools.bsrn import get_bsrn # noqa: F401 +from pvlib.iotools.bsrn import read_bsrn # noqa: F401 +from pvlib.iotools.bsrn import parse_bsrn # noqa: F401 from pvlib.iotools.sodapro import get_cams # noqa: F401 from pvlib.iotools.sodapro import read_cams # noqa: F401 from pvlib.iotools.sodapro import parse_cams # noqa: F401 diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 4add7ba113..3f3f98f95b 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -62,9 +62,9 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], station: str 3-letter BSRN station abbreviation username: str - username for accessing the BSRN ftp server + username for accessing the BSRN FTP server password: str - password for accessing the BSRN ftp server + password for accessing the BSRN FTP server logical_records: list, default: ['0100'] List of the logical records (LR) to parse. Options are: 0100, 0300, and 0500. @@ -83,7 +83,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], ------ KeyError If the specified station does not exist on the FTP server. - + Warning ------- UserWarning @@ -95,7 +95,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], Required username and password can be obtained for free as described in the BSRN's Data Release Guidelines [4]_. - Currently only the basic measurements (LR0100) are parsed, which include + Currently only LR0100, LR0300, and LR0500 can be parsed, which include global, diffuse, direct, and downwelling long-wave radiation [3]_. Future updates may include parsing of additional data and metadata. @@ -113,7 +113,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], See Also -------- - pvlib.iotools.read_bsrn + pvlib.iotools.read_bsrn, pvlib.iotools.parse_bsrn References ---------- @@ -128,7 +128,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], .. [4] `BSRN Data Release Guidelines `_ """ # noqa: E501 - # The ftp server uses lowercase station abbreviations + # The FTP server uses lowercase station abbreviations station = station.lower() # Generate list files to download based on start/end (SSSMMYY.dat.gz) @@ -156,7 +156,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], # Decompress/unzip and decode the binary file text = gzip.decompress(bio.getvalue()).decode('utf-8') # Convert string to StringIO and parse data - dfi, metadata = parse_bsrn(io.StringIO(text)) + dfi, metadata = parse_bsrn(io.StringIO(text), logical_records) dfs.append(dfi) # Save file locally if local_path is specified if local_path is not None: @@ -182,7 +182,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], return data, metadata -def parse_bsrn(fbuf, logical_records='0100'): +def parse_bsrn(fbuf, logical_records=['0100']): """ Parse a file-like buffer of BSRN station-to-archive file into a DataFrame. @@ -254,12 +254,12 @@ def parse_bsrn(fbuf, logical_records='0100'): for num, line in enumerate(fbuf): if line.startswith('*'): # Find start of all logical records if len(lr_startrow) >= 1: - lr_nrows[lr] = num - max(lr_startrow.values()) - 1 + lr_nrows[lr] = num - max(lr_startrow.values())-1 # noqa: F821 lr = line[2:6] # string of 4 digit LR number lr_startrow[lr] = num lr_nrows[lr] = num - lr_startrow[lr] - for lr in logical_records: + for lr in list(logical_records): if lr not in ['0100', '0300', '0500']: raise ValueError(f"Logical record {lr} not in " f"{['0100', '0300','0500']}.") @@ -278,15 +278,11 @@ def parse_bsrn(fbuf, logical_records='0100'): # Sort columns to match original order and assign column names LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns') LR_0100.columns = BSRN_LR0100_COLUMNS - # Drop empty columns - LR_0100 = LR_0100.drop('empty', axis='columns') - # Change day and minute type to integer - LR_0100['day'] = LR_0100['day'].astype('Int64') - LR_0100['minute'] = LR_0100['minute'].astype('Int64') - # Set datetime index LR_0100.index = (start_date + pd.to_timedelta(LR_0100['day']-1, unit='d') # noqa: E501 + pd.to_timedelta(LR_0100['minute'], unit='T')) + # Drop empty, minute, and day columns + LR_0100 = LR_0100.drop(columns=['empty', 'day', 'minute']) dfs.append(LR_0100) # Parse LR0300 - other time series data, including upward and net radiation @@ -331,15 +327,15 @@ def read_bsrn(filename, logical_records=['0100']): The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. - The function only parses the basic measurements (LR0100), which include - global, diffuse, direct, and downwelling long-wave radiation [2]_. Future - updates may include parsing of additional data and meta-data. + The function is able to parse LR0100, LR0300, and LR0500. LR0100 include + the basic measurements (LR0100), which include global, diffuse, direct, and + downwelling long-wave radiation [2]_. Future updates may include parsing of + additional data and metadata. BSRN files are freely available and can be accessed via FTP [3]_. Required username and password are easily obtainable as described in the BSRN's Data Release Guidelines [4]_. - Parameters ---------- filename: str or path-like @@ -358,13 +354,11 @@ def read_bsrn(filename, logical_records=['0100']): Notes ----- - The data DataFrame includes the following fields: + The data DataFrame for LR0100 includes the following fields: ======================= ====== ========================================== Key Format Description ======================= ====== ========================================== - day int Day of the month 1-31 - minute int Minute of the day 0-1439 ghi float Mean global horizontal irradiance [W/m^2] ghi_std float Std. global horizontal irradiance [W/m^2] ghi_min float Min. global horizontal irradiance [W/m^2] @@ -386,6 +380,8 @@ def read_bsrn(filename, logical_records=['0100']): pressure float Atmospheric pressure [hPa] ======================= ====== ========================================== + For fields for other logical records, see [2]_. + See Also -------- pvlib.iotools.parse_bsrn, pvlib.iotools.get_bsrn From b6b8df6ee743f3e50ca0a986061cd859067e599d Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Mon, 19 Jul 2021 19:45:39 -0600 Subject: [PATCH 21/32] Coverage for additional logical records --- pvlib/iotools/bsrn.py | 25 +++++++++++++------------ pvlib/tests/iotools/test_bsrn.py | 29 ++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 3f3f98f95b..47ae252ad0 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -93,11 +93,12 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], Notes ----- Required username and password can be obtained for free as described in the - BSRN's Data Release Guidelines [4]_. + BSRN's Data Release Guidelines [3]_. - Currently only LR0100, LR0300, and LR0500 can be parsed, which include - global, diffuse, direct, and downwelling long-wave radiation [3]_. Future - updates may include parsing of additional data and metadata. + Currently only parsing of LR0100, LR0300, and LR0500 is supported. LR0100 + is contains the basic irradiance and auxillary measurements. See + [4]_ for a description of the different logical records. Future updates may + include parsing of additional data and metadata. Important --------- @@ -122,11 +123,11 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], `_ .. [2] `BSRN Data Retrieval via FTP `_ + .. [4] `BSRN Data Release Guidelines + `_ .. [3] `Update of the Technical Plan for BSRN Data Management, 2013, Global Climate Observing System (GCOS) GCOS-174. `_ - .. [4] `BSRN Data Release Guidelines - `_ """ # noqa: E501 # The FTP server uses lowercase station abbreviations station = station.lower() @@ -184,7 +185,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], def parse_bsrn(fbuf, logical_records=['0100']): """ - Parse a file-like buffer of BSRN station-to-archive file into a DataFrame. + Parse a file-like buffer of a BSRN station-to-archive file. Parameters ---------- @@ -197,7 +198,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): Returns ------- data: DataFrame - A DataFrame with the columns as described below. See + A DataFrame containing time-series measurement data. See pvlib.iotools.read_bsrn for fields. metadata: dict Dictionary containing metadata (primarily from LR0004). @@ -327,10 +328,10 @@ def read_bsrn(filename, logical_records=['0100']): The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. - The function is able to parse LR0100, LR0300, and LR0500. LR0100 include - the basic measurements (LR0100), which include global, diffuse, direct, and - downwelling long-wave radiation [2]_. Future updates may include parsing of - additional data and metadata. + The function is able to parse LR0100, LR0300, and LR0500. LR0100 contains + the basic measurements, which include global, diffuse, and direct + irradiance, as well as downwelling long-wave radiation [2]_. Future updates + may include parsing of additional data and metadata. BSRN files are freely available and can be accessed via FTP [3]_. Required username and password are easily obtainable as described in the BSRN's diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index d1b23688bd..d82e6e8c49 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -41,6 +41,25 @@ def test_read_bsrn(testfile, expected_index): assert 'relative_humidity' in data.columns +def test_read_bsrn_logical_records(expected_index): + # Test if logical records 0300 and 0500 are correct parsed + # and that 0100 is not passed when not specified + data, metadata = read_bsrn(DATA_DIR / 'bsrn-pay0616.dat.gz', + logical_records=['0300', '0500']) + assert_index_equal(expected_index, data.index) + assert 'ghi' not in data.columns + assert 'upward long-wave' in data.columns + assert 'uva_global_mean' in data.columns + assert 'uvb_reflect_std' in data.columns + + +def test_read_bsrn_bad_logical_record(): + # Test if ValueError is raised if an unsupported logical record is passed + with pytest.raises(ValueError, match='not in'): + read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat', + logical_records=['dummy']) + + @requires_bsrn_credentials @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) @@ -67,15 +86,15 @@ def test_get_bsrn(expected_index, bsrn_credentials): @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_get_bsrn_bad_station(bsrn_credentials): - # Test if ValueError is raised if a bad station name is passed + # Test if KeyError is raised if a bad station name is passed username, password = bsrn_credentials with pytest.raises(KeyError, match='sub-directory does not exist'): get_bsrn( start=pd.Timestamp(2016, 6, 1), end=pd.Timestamp(2016, 6, 29), station='not_a_station_name', - username='bsrnftp', - password='bsrn1') + username=username, + password=password) @requires_bsrn_credentials @@ -89,5 +108,5 @@ def test_get_bsrn_no_files(bsrn_credentials): start=pd.Timestamp(1990, 6, 1), end=pd.Timestamp(1990, 6, 29), station='tam', - username='bsrnftp', - password='bsrn1') + username=username, + password=password) From 2503d974546bcc29186f3014e77fcc1ccd29219b Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 20 Jul 2021 08:03:23 -0600 Subject: [PATCH 22/32] Refactor warnings in get_bsrn If only some files are missing, give only one warning with a list of missing files --- pvlib/iotools/bsrn.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 47ae252ad0..b97f4124c2 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -65,9 +65,9 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], username for accessing the BSRN FTP server password: str password for accessing the BSRN FTP server - logical_records: list, default: ['0100'] - List of the logical records (LR) to parse. Options are: 0100, 0300, - and 0500. + logical_records: str or list, default: ['0100'] + List of the logical records (LR) to parse. Options are: '0100', '0300', + and '0500'. local_path: str or path-like, default: None, optional If specified, path (abs. or relative) of where to save files @@ -146,6 +146,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], 'station is probably not a proper three letter ' 'station abbreviation.') from e dfs = [] # Initialize list for monthly dataframes + non_existing_files = [] # Initilize list of files that were not found for filename in filenames: try: bio = io.BytesIO() # Initialize BytesIO object @@ -167,18 +168,23 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], # FTP client raises an error if the file does not exist on server except ftplib.error_perm as e: if str(e) == '550 Failed to open file.': - warnings.warn(f'File: {filename} does not exist') + non_existing_files.append(filename) else: raise ftplib.error_perm(e) ftp.quit() # Close and exit FTP connection + # Raise user warnings + if not dfs: # If no files were found + warnings.warn('No files were available for the specified timeframe.') + elif non_existing_files: # If only some files were missing + warnings.warn(f'The following files were not found: {non_existing_files}') # noqa: E501 + # Concatenate monthly dataframes to one dataframe if len(dfs): data = pd.concat(dfs, axis='rows') else: # Return empty dataframe data = pd.DataFrame(columns=BSRN_LR0100_COLUMNS) metadata = {} - warnings.warn('No files were available for the specified timeframe.') # Return dataframe and metadata (metadata belongs to last available file) return data, metadata @@ -191,7 +197,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): ---------- fbuf: file-like buffer Buffer of a BSRN station-to-archive data file - logical_records: list, default: ['0100'] + logical_records: str or list, default: ['0100'] List of the logical records (LR) to parse. Options are: 0100, 0300, and 0500. @@ -280,7 +286,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): LR_0100 = LR_0100.reindex(sorted(LR_0100.columns), axis='columns') LR_0100.columns = BSRN_LR0100_COLUMNS # Set datetime index - LR_0100.index = (start_date + pd.to_timedelta(LR_0100['day']-1, unit='d') # noqa: E501 + LR_0100.index = (start_date+pd.to_timedelta(LR_0100['day']-1, unit='d') + pd.to_timedelta(LR_0100['minute'], unit='T')) # Drop empty, minute, and day columns LR_0100 = LR_0100.drop(columns=['empty', 'day', 'minute']) @@ -294,8 +300,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): na_values=[-999.0, -99.9], colspecs=BSRN_LR0300_COL_SPECS, names=BSRN_LR0300_COLUMNS) - LR_0300.index = (start_date - + pd.to_timedelta(LR_0300['day']-1, unit='d') + LR_0300.index = (start_date+pd.to_timedelta(LR_0300['day']-1, unit='d') + pd.to_timedelta(LR_0300['minute'], unit='T')) LR_0300 = LR_0300.drop(columns=['day', 'minute']).astype(float) dfs.append(LR_0300) @@ -312,8 +317,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): # Sort columns to match original order and assign column names LR_0500 = LR_0500.reindex(sorted(LR_0500.columns), axis='columns') LR_0500.columns = BSRN_LR0500_COLUMNS - LR_0500.index = (start_date - + pd.to_timedelta(LR_0500['day']-1, unit='d') + LR_0500.index = (start_date+pd.to_timedelta(LR_0500['day']-1, unit='d') + pd.to_timedelta(LR_0500['minute'], unit='T')) LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute']) dfs.append(LR_0500) @@ -341,7 +345,7 @@ def read_bsrn(filename, logical_records=['0100']): ---------- filename: str or path-like Name or path of a BSRN station-to-archive data file - logical_records: list, default: ['0100'] + logical_records: str or list, default: ['0100'] List of the logical records (LR) to parse. Options are: 0100, 0300, and 0500. @@ -405,4 +409,5 @@ def read_bsrn(filename, logical_records=['0100']): else: open_func, mode = open, 'r' with open_func(filename, mode) as f: - return parse_bsrn(f, logical_records) + content = parse_bsrn(f, logical_records) + return content From 8494f6b0526146b1555e24a4e35238029c5aca2c Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Tue, 20 Jul 2021 09:16:29 -0600 Subject: [PATCH 23/32] Add Hint section --- pvlib/iotools/bsrn.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index b97f4124c2..c2f3750407 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -84,19 +84,20 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], KeyError If the specified station does not exist on the FTP server. - Warning - ------- + Warns + ----- UserWarning If a requested file is missing a UserWarning is returned with the filename. Also, if no files match the specified station and timeframe. Notes ----- - Required username and password can be obtained for free as described in the - BSRN's Data Release Guidelines [3]_. + The username and password for the BSRN FTP server can be obtained for free + as described in the BSRN's Data Release Guidelines [3]_. - Currently only parsing of LR0100, LR0300, and LR0500 is supported. LR0100 - is contains the basic irradiance and auxillary measurements. See + Currently only parsing of LR0100, LR0300, and LR0500 is supported. Note + not all stations measure LR0300 and LR0500. However, LR0100 is mandatory as + it contains the basic irradiance and auxillary measurements. See [4]_ for a description of the different logical records. Future updates may include parsing of additional data and metadata. @@ -387,6 +388,11 @@ def read_bsrn(filename, logical_records=['0100']): For fields for other logical records, see [2]_. + Hint + ---- + According to [2]_ ""All time labels in the station-to-archive files denote + the start of a time interval." This corresponds to left bin edge labeling. + See Also -------- pvlib.iotools.parse_bsrn, pvlib.iotools.get_bsrn From cc7369cf7166f8fcc2879eaa8195d19b481e2b85 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 21 Jul 2021 20:22:05 -0600 Subject: [PATCH 24/32] Add function for empty dataframe and restructure data docs --- pvlib/iotools/bsrn.py | 125 ++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index c2f3750407..f1e3201007 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -15,7 +15,9 @@ (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] -BSRN_LR0300_COL_SPECS = [(0, 3), (4, 9), (10, 16), (16, 22), (22, 27)] +BSRN_LR0300_COL_SPECS = [(1, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27,31), + (31, 38), (38, 44), (44, 49), (49, 54), (54, 61), + (61, 67), (67, 72), (72, 78)] BSRN_LR0500_COL_SPECS = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32), (32, 38), (38, 44), (44, 50), (50, 56), (56, 62), @@ -29,8 +31,10 @@ 'lwd', 'lwd_std', 'lwd_min', 'lwd_max', 'temp_air', 'relative_humidity', 'pressure'] -BSRN_LR0300_COLUMNS = ['day', 'minute', 'upward short-wave reflected', - 'upward long-wave', 'net radiation'] +BSRN_LR0300_COLUMNS = ['day', 'minute', 'gri', 'gri_std', 'gri_min', 'gri_max', + 'lwu', 'lwu_std', 'lwu_min', 'lwu_max', 'net_radiation', + 'net_radiation_std', 'net_radiation_min', + 'net_radiation_max'] BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global_mean', 'uva_global_std', 'uva_global_min', 'uva_global_max', 'uvb_direct_mean', @@ -43,6 +47,18 @@ 'uvb_reflect_mean', 'uvb_reflect_std', 'uvb_reflect_min', 'uvb_reflect_max'] +BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0500': BSRN_LR0500_COLUMNS, + '0500': BSRN_LR0500_COLUMNS} + + +def _empty_dataframe_from_logical_records(logical_records): + # Create an empty DataFrame with the column names corresponding to the + # requested logical records + columns = [] + for lr in logical_records: + columns += BSRN_COLUMNS[lr][2:] + return pd.DataFrame(columns=columns) + def get_bsrn(start, end, station, username, password, logical_records=['0100'], local_path=None): @@ -52,6 +68,8 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. Data is retrieved from the BSRN FTP server [2]_. + + Data is returned for the entire months between and including start and end. Parameters ---------- @@ -68,16 +86,17 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], logical_records: str or list, default: ['0100'] List of the logical records (LR) to parse. Options are: '0100', '0300', and '0500'. - local_path: str or path-like, default: None, optional + local_path: str or path-like, optional If specified, path (abs. or relative) of where to save files Returns ------- data: DataFrame timeseries data from the BSRN archive, see - :func:`pvlib.iotools.read_bsrn` for fields + :func:`pvlib.iotools.read_bsrn` for fields. Empty DataFrame if data is + not available for the specified period. metadata: dict - metadata for the last available monthly file + metadata for the last available monthly file. Raises ------ @@ -156,16 +175,16 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], # Check that transfer was successfull if not response.startswith('226 Transfer complete'): raise ftplib.Error(response) - # Decompress/unzip and decode the binary file - text = gzip.decompress(bio.getvalue()).decode('utf-8') - # Convert string to StringIO and parse data - dfi, metadata = parse_bsrn(io.StringIO(text), logical_records) - dfs.append(dfi) # Save file locally if local_path is specified if local_path is not None: # Create local file with open(os.path.join(local_path, filename), 'wb') as f: f.write(bio.getbuffer()) # Write local file + # Decompress/unzip and decode the binary file + text = gzip.decompress(bio.getvalue()).decode('latin1') + # Convert string to StringIO and parse data + dfi, metadata = parse_bsrn(io.StringIO(text), logical_records) + dfs.append(dfi) # FTP client raises an error if the file does not exist on server except ftplib.error_perm as e: if str(e) == '550 Failed to open file.': @@ -184,7 +203,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], if len(dfs): data = pd.concat(dfs, axis='rows') else: # Return empty dataframe - data = pd.DataFrame(columns=BSRN_LR0100_COLUMNS) + data = _empty_dataframe_from_logical_records(logical_records) metadata = {} # Return dataframe and metadata (metadata belongs to last available file) return data, metadata @@ -199,14 +218,14 @@ def parse_bsrn(fbuf, logical_records=['0100']): fbuf: file-like buffer Buffer of a BSRN station-to-archive data file logical_records: str or list, default: ['0100'] - List of the logical records (LR) to parse. Options are: 0100, 0300, - and 0500. + List of the logical records (LR) to parse. Options are: '0100', '0300', + and '0500'. Returns ------- data: DataFrame - A DataFrame containing time-series measurement data. See - pvlib.iotools.read_bsrn for fields. + timeseries data from the BSRN archive, see + :func:`pvlib.iotools.read_bsrn` for fields. metadata: dict Dictionary containing metadata (primarily from LR0004). @@ -230,7 +249,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): if line[2:6] == '0004': # stop once LR0004 has been reached break elif line == '': - raise ValueError('Mandatatory record LR0004 not found.') + raise ValueError('Mandatory record LR0004 not found.') metadata['date when station description changed'] = fbuf.readline().strip() metadata['surface type'] = int(fbuf.readline(3)) metadata['topography type'] = int(fbuf.readline()) @@ -252,7 +271,10 @@ def parse_bsrn(fbuf, logical_records=['0100']): break else: horizon += [int(i) for i in line.split()] - metadata['horizon'] = pd.Series(horizon[1::2], horizon[::2]).sort_index().drop(-1) # noqa: E501 + horizon = pd.Series(horizon[1::2], horizon[::2], name='horizon_elevation', + dtype=int).drop(-1, errors='ignore').sort_index() + horizon.index.name = 'azimuth' + metadata['horizon'] = horizon # Read file and store the starting line number and number of lines for # each logical record (LR) @@ -323,7 +345,11 @@ def parse_bsrn(fbuf, logical_records=['0100']): LR_0500 = LR_0500.drop(columns=['empty', 'day', 'minute']) dfs.append(LR_0500) - data = pd.concat(dfs, axis='columns') + if len(dfs): + data = pd.concat(dfs, axis='columns') + else: + data = _empty_dataframe_from_logical_records(logical_records) + metadata = {} return data, metadata @@ -333,28 +359,29 @@ def read_bsrn(filename, logical_records=['0100']): The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. - The function is able to parse LR0100, LR0300, and LR0500. LR0100 contains - the basic measurements, which include global, diffuse, and direct - irradiance, as well as downwelling long-wave radiation [2]_. Future updates - may include parsing of additional data and metadata. + The function is able to parse logical records (LR) 0100, 0300, and 0500. + LR0100 contains the basic measurements, which include global, diffuse, and + direct irradiance, as well as downwelling long-wave radiation [2]_. Future + updates may include parsing of additional data and metadata. - BSRN files are freely available and can be accessed via FTP [3]_. Required - username and password are easily obtainable as described in the BSRN's - Data Release Guidelines [4]_. + BSRN files are freely available and can be accessed via FTP [3]_. The + username and password for the BSRN FTP server can be obtained for free as + described in the BSRN's Data Release Guidelines [3]_. Parameters ---------- filename: str or path-like Name or path of a BSRN station-to-archive data file logical_records: str or list, default: ['0100'] - List of the logical records (LR) to parse. Options are: 0100, 0300, - and 0500. + List of the logical records (LR) to parse. Options are: '0100', '0300', + and '0500'. Returns ------- data: DataFrame A DataFrame with the columns as described below. For more extensive - description of the variables, consult [2]_. + description of the variables, consult [2]_. An empty DataFrame is + returned if the specificed logical records were not found. metadata: dict Dictionary containing metadata (primarily from LR0004). @@ -365,28 +392,34 @@ def read_bsrn(filename, logical_records=['0100']): ======================= ====== ========================================== Key Format Description ======================= ====== ========================================== - ghi float Mean global horizontal irradiance [W/m^2] - ghi_std float Std. global horizontal irradiance [W/m^2] - ghi_min float Min. global horizontal irradiance [W/m^2] - ghi_max float Max. global horizontal irradiance [W/m^2] - dni float Mean direct normal irradiance [W/m^2] - dni_std float Std. direct normal irradiance [W/m^2] - dni_min float Min. direct normal irradiance [W/m^2] - dni_max float Max. direct normal irradiance [W/m^2] - dhi float Mean diffuse horizontal irradiance [W/m^2] - dhi_std float Std. diffuse horizontal irradiance [W/m^2] - dhi_min float Min. diffuse horizontal irradiance [W/m^2] - dhi_max float Max. diffuse horizontal irradiance [W/m^2] - lwd float Mean. downward long-wave radiation [W/m^2] - lwd_std float Std. downward long-wave radiation [W/m^2] - lwd_min float Min. downward long-wave radiation [W/m^2] - lwd_max float Max. downward long-wave radiation [W/m^2] + *Logical record 0100* + --------------------------------------------------------------------------- + ghi† float Mean global horizontal irradiance [W/m^2] + dni† float Mean direct normal irradiance [W/m^2] + dhi† float Mean diffuse horizontal irradiance [W/m^2] + lwd† float Mean. downward long-wave radiation [W/m^2] temp_air float Air temperature [°C] relative_humidity float Relative humidity [%] pressure float Atmospheric pressure [hPa] + --------------------------------------------------------------------------- + *Logical record 0300* + --------------------------------------------------------------------------- + gri† float + lwu† float Lowng-wave upwelling irradiance [W/m^2] + net_radiation† float Net radiation (net radiometer) [W/m^2] + --------------------------------------------------------------------------- + *Logical record 0500* + --------------------------------------------------------------------------- + uva_global† float UV-A global irradiance [W/m^2] + uvb_direct† float UV-B direct irradiance [W/m^2] + uvb_global† float UV-B global irradiance [W/m^2] + uvb_diffuse† float UV-B diffuse irradiance [W/m^2] + uvb_reflected† float UV-B reflected irradiance [W/m^2] ======================= ====== ========================================== - For fields for other logical records, see [2]_. + † Marked variables have corresponding columns for the standard devaiation + (_std), minimum (_min), and maximum (_max) based on the 60 samples made + for each minute. Hint ---- From 12791cd343f1c12afb9196f4d9b33e38369aed82 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 21 Jul 2021 20:44:35 -0600 Subject: [PATCH 25/32] Add gri to list of variables --- pvlib/data/variables_style_rules.csv | 1 + pvlib/iotools/bsrn.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pvlib/data/variables_style_rules.csv b/pvlib/data/variables_style_rules.csv index 34190625bb..a56dddd161 100644 --- a/pvlib/data/variables_style_rules.csv +++ b/pvlib/data/variables_style_rules.csv @@ -7,6 +7,7 @@ dni_extra;direct normal irradiance at top of atmosphere (extraterrestrial) dhi;diffuse horizontal irradiance bhi;beam/direct horizontal irradiance ghi;global horizontal irradiance +gri;ground-reflected irradiance aoi;angle of incidence between :math:`90\deg` and :math:`90\deg` aoi_projection;cos(aoi) airmass;airmass diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index f1e3201007..cfc1874fae 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -15,9 +15,9 @@ (27, 32), (32, 39), (39, 45), (45, 50), (50, 55), (55, 64), (64, 70), (70, 75)] -BSRN_LR0300_COL_SPECS = [(1, 3), (4, 9), (10, 16), (16, 22), (22, 27), (27,31), - (31, 38), (38, 44), (44, 49), (49, 54), (54, 61), - (61, 67), (67, 72), (72, 78)] +BSRN_LR0300_COL_SPECS = [(1, 3), (4, 9), (10, 16), (16, 22), (22, 27), + (27, 31), (31, 38), (38, 44), (44, 49), (49, 54), + (54, 61), (61, 67), (67, 72), (72, 78)] BSRN_LR0500_COL_SPECS = [(0, 3), (3, 8), (8, 14), (14, 20), (20, 26), (26, 32), (32, 38), (38, 44), (44, 50), (50, 56), (56, 62), @@ -47,7 +47,7 @@ 'uvb_reflect_mean', 'uvb_reflect_std', 'uvb_reflect_min', 'uvb_reflect_max'] -BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0500': BSRN_LR0500_COLUMNS, +BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0300': BSRN_LR0300_COLUMNS, '0500': BSRN_LR0500_COLUMNS} @@ -68,7 +68,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], The BSRN (Baseline Surface Radiation Network) is a world wide network of high-quality solar radiation monitoring stations as described in [1]_. Data is retrieved from the BSRN FTP server [2]_. - + Data is returned for the entire months between and including start and end. Parameters @@ -392,7 +392,7 @@ def read_bsrn(filename, logical_records=['0100']): ======================= ====== ========================================== Key Format Description ======================= ====== ========================================== - *Logical record 0100* + **Logical record 0100** --------------------------------------------------------------------------- ghi† float Mean global horizontal irradiance [W/m^2] dni† float Mean direct normal irradiance [W/m^2] @@ -402,13 +402,13 @@ def read_bsrn(filename, logical_records=['0100']): relative_humidity float Relative humidity [%] pressure float Atmospheric pressure [hPa] --------------------------------------------------------------------------- - *Logical record 0300* + **Logical record 0300** --------------------------------------------------------------------------- - gri† float + gri† float Ground-reflected irradiance [W/m^2] lwu† float Lowng-wave upwelling irradiance [W/m^2] net_radiation† float Net radiation (net radiometer) [W/m^2] --------------------------------------------------------------------------- - *Logical record 0500* + **Logical record 0500** --------------------------------------------------------------------------- uva_global† float UV-A global irradiance [W/m^2] uvb_direct† float UV-B direct irradiance [W/m^2] From 9289760683183e42bc67e2abdf69a2de3f5052f1 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 21 Jul 2021 21:16:34 -0600 Subject: [PATCH 26/32] Coverage for records not found --- pvlib/iotools/bsrn.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index cfc1874fae..f196ea24ba 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -36,16 +36,16 @@ 'net_radiation_std', 'net_radiation_min', 'net_radiation_max'] -BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global_mean', 'uva_global_std', - 'uva_global_min', 'uva_global_max', 'uvb_direct_mean', +BSRN_LR0500_COLUMNS = ['day', 'minute', 'uva_global', 'uva_global_std', + 'uva_global_min', 'uva_global_max', 'uvb_direct', 'uvb_direct_std', 'uvb_direct_min', 'uvb_direct_max', 'empty', 'empty', 'empty', 'empty', - 'uvb_global_mean', 'uvb_global_std', 'uvb_global_min', - 'uvb_global_max', 'uvb_diffuse_mean', 'uvb_diffuse_std', - 'uvb_diffuse_mean', 'uvb_diffuse_std', + 'uvb_global', 'uvb_global_std', 'uvb_global_min', + 'uvb_global_max', 'uvb_diffuse', 'uvb_diffuse_std', + 'uvb_diffuse', 'uvb_diffuse_std', 'uvb_diffuse_min', 'uvb_diffuse_max', - 'uvb_reflect_mean', 'uvb_reflect_std', - 'uvb_reflect_min', 'uvb_reflect_max'] + 'uvb_reflected', 'uvb_reflected_std', + 'uvb_reflected_min', 'uvb_reflected_max'] BSRN_COLUMNS = {'0100': BSRN_LR0100_COLUMNS, '0300': BSRN_LR0300_COLUMNS, '0500': BSRN_LR0500_COLUMNS} @@ -404,17 +404,17 @@ def read_bsrn(filename, logical_records=['0100']): --------------------------------------------------------------------------- **Logical record 0300** --------------------------------------------------------------------------- - gri† float Ground-reflected irradiance [W/m^2] - lwu† float Lowng-wave upwelling irradiance [W/m^2] - net_radiation† float Net radiation (net radiometer) [W/m^2] + gri† float Mean ground-reflected irradiance [W/m^2] + lwu† float Mean lowng-wave upwelling irradiance [W/m^2] + net_radiation† float Mean net radiation (net radiometer) [W/m^2] --------------------------------------------------------------------------- **Logical record 0500** --------------------------------------------------------------------------- - uva_global† float UV-A global irradiance [W/m^2] - uvb_direct† float UV-B direct irradiance [W/m^2] - uvb_global† float UV-B global irradiance [W/m^2] - uvb_diffuse† float UV-B diffuse irradiance [W/m^2] - uvb_reflected† float UV-B reflected irradiance [W/m^2] + uva_global† float Mean UV-A global irradiance [W/m^2] + uvb_direct† float Mean UV-B direct irradiance [W/m^2] + uvb_global† float Mean UV-B global irradiance [W/m^2] + uvb_diffuse† float Mean UV-B diffuse irradiance [W/m^2] + uvb_reflected† float Mean UV-B reflected irradiance [W/m^2] ======================= ====== ========================================== † Marked variables have corresponding columns for the standard devaiation From 735666dbd5c7ece5cf7bbf8e3e04d5d29628dd9d Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 21 Jul 2021 21:18:02 -0600 Subject: [PATCH 27/32] Coverage for no logical records found --- pvlib/tests/iotools/test_bsrn.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index d82e6e8c49..a2873c37b2 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -47,10 +47,10 @@ def test_read_bsrn_logical_records(expected_index): data, metadata = read_bsrn(DATA_DIR / 'bsrn-pay0616.dat.gz', logical_records=['0300', '0500']) assert_index_equal(expected_index, data.index) + assert 'lwu' in data.columns + assert 'uva_global' in data.columns + assert 'uvb_reflected_std' in data.columns assert 'ghi' not in data.columns - assert 'upward long-wave' in data.columns - assert 'uva_global_mean' in data.columns - assert 'uvb_reflect_std' in data.columns def test_read_bsrn_bad_logical_record(): @@ -60,6 +60,18 @@ def test_read_bsrn_bad_logical_record(): logical_records=['dummy']) +def test_read_bsrn_logical_records_not_found(): + # Test if an empty dataframe is returned if specified LRs are not present + data, metadata = read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat', + logical_records=['0300', '0500']) + assert_index_equal(pd.DataFrame().index, data.index) + assert 'uva_global' in data.columns + assert 'uvb_reflected_std' in data.columns + assert 'uva_global_max' in data.columns + assert 'dni' not in data.columns + assert 'day' not in data.columns + + @requires_bsrn_credentials @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) From ac018dd5e8f5a23d0b6432eac509cec09a31d9cf Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Wed, 21 Jul 2021 22:54:25 -0600 Subject: [PATCH 28/32] Formatting of data columns table --- pvlib/iotools/bsrn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index f196ea24ba..9a073c32cd 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -401,13 +401,13 @@ def read_bsrn(filename, logical_records=['0100']): temp_air float Air temperature [°C] relative_humidity float Relative humidity [%] pressure float Atmospheric pressure [hPa] - --------------------------------------------------------------------------- + ----------------------- ------ ------------------------------------------ **Logical record 0300** --------------------------------------------------------------------------- gri† float Mean ground-reflected irradiance [W/m^2] lwu† float Mean lowng-wave upwelling irradiance [W/m^2] net_radiation† float Mean net radiation (net radiometer) [W/m^2] - --------------------------------------------------------------------------- + ----------------------- ------ ------------------------------------------ **Logical record 0500** --------------------------------------------------------------------------- uva_global† float Mean UV-A global irradiance [W/m^2] @@ -417,7 +417,7 @@ def read_bsrn(filename, logical_records=['0100']): uvb_reflected† float Mean UV-B reflected irradiance [W/m^2] ======================= ====== ========================================== - † Marked variables have corresponding columns for the standard devaiation + † Marked variables have corresponding columns for the standard deviation (_std), minimum (_min), and maximum (_max) based on the 60 samples made for each minute. @@ -442,7 +442,7 @@ def read_bsrn(filename, logical_records=['0100']): `_ .. [4] `BSRN Data Release Guidelines `_ - """ + """ # noqa: E501 if str(filename).endswith('.gz'): # check if file is a gzipped (.gz) file open_func, mode = gzip.open, 'rt' else: From 6a4e75c0b221513f6577587b2c322a0d0810df6a Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Thu, 22 Jul 2021 13:01:15 -0600 Subject: [PATCH 29/32] Merge read_ and get_bsrn in whatsnew --- docs/sphinx/source/whatsnew/v0.9.0.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.9.0.rst b/docs/sphinx/source/whatsnew/v0.9.0.rst index 686c060a5a..f5c7accc11 100644 --- a/docs/sphinx/source/whatsnew/v0.9.0.rst +++ b/docs/sphinx/source/whatsnew/v0.9.0.rst @@ -103,15 +103,13 @@ Deprecations Enhancements ~~~~~~~~~~~~ -* Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn` - for retrieving and reading BSRN solar radiation data files. - (:pull:`1254`, :pull:`1145`, :issue:`1015`) * Added :func:`~pvlib.iotools.read_pvgis_hourly` and :func:`~pvlib.iotools.get_pvgis_hourly` for reading and retrieving hourly solar radiation data and PV power output from PVGIS. (:pull:`1186`, :issue:`849`) -* Add :func:`~pvlib.iotools.read_bsrn` for reading BSRN solar radiation data - files. (:pull:`1145`, :issue:`1015`) +* Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn` + for retrieving and reading BSRN solar radiation data files. + (:pull:`1254`, :pull:`1145`, :issue:`1015`) * Add :func:`~pvlib.iotools.get_cams`, :func:`~pvlib.iotools.parse_cams`, and :func:`~pvlib.iotools.read_cams` From f96b39b333d0d9239b38d7e3452f66d2337ec35e Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Thu, 22 Jul 2021 13:34:19 -0600 Subject: [PATCH 30/32] Changes from review by kandersol-nrel --- pvlib/iotools/bsrn.py | 79 +++++++++++++++++--------------- pvlib/tests/iotools/test_bsrn.py | 4 +- 2 files changed, 43 insertions(+), 40 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 9a073c32cd..4b58400862 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -60,8 +60,8 @@ def _empty_dataframe_from_logical_records(logical_records): return pd.DataFrame(columns=columns) -def get_bsrn(start, end, station, username, password, logical_records=['0100'], - local_path=None): +def get_bsrn(start, end, station, username, password, + logical_records=('0100',), local_path=None): """ Retrieve ground measured irradiance data from the BSRN FTP server. @@ -83,9 +83,9 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], username for accessing the BSRN FTP server password: str password for accessing the BSRN FTP server - logical_records: str or list, default: ['0100'] - List of the logical records (LR) to parse. Options are: '0100', '0300', - and '0500'. + logical_records: list or tuple, default: ('0100',) + List of the logical records (LR) to parse. Options include: '0100', + '0300', and '0500'. local_path: str or path-like, optional If specified, path (abs. or relative) of where to save files @@ -93,8 +93,8 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], ------- data: DataFrame timeseries data from the BSRN archive, see - :func:`pvlib.iotools.read_bsrn` for fields. Empty DataFrame if data is - not available for the specified period. + :func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is + returned if no data was found for the time period. metadata: dict metadata for the last available monthly file. @@ -106,19 +106,20 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], Warns ----- UserWarning - If a requested file is missing a UserWarning is returned with the - filename. Also, if no files match the specified station and timeframe. + If one or more requested files are missing a UserWarning is returned + with a list of the filenames missing. If no files match the specified + station and timeframe a seperate UserWarning is given. Notes ----- The username and password for the BSRN FTP server can be obtained for free as described in the BSRN's Data Release Guidelines [3]_. - Currently only parsing of LR0100, LR0300, and LR0500 is supported. Note - not all stations measure LR0300 and LR0500. However, LR0100 is mandatory as - it contains the basic irradiance and auxillary measurements. See - [4]_ for a description of the different logical records. Future updates may - include parsing of additional data and metadata. + Currently only parsing of logical records 0100, 0300 and 0500 is supported. + Note not all stations measure LR0300 and LR0500. However, LR0100 is + mandatory as it contains the basic irradiance and auxillary measurements. + See [4]_ for a description of the different logical records. Future updates + may include parsing of additional data and metadata. Important --------- @@ -153,7 +154,8 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], station = station.lower() # Generate list files to download based on start/end (SSSMMYY.dat.gz) - filenames = pd.date_range(start, end + pd.DateOffset(months=1), freq='1M')\ + filenames = pd.date_range( + start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')\ .strftime(f"{station}%m%y.dat.gz").tolist() # Create FTP connection @@ -180,10 +182,10 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], # Create local file with open(os.path.join(local_path, filename), 'wb') as f: f.write(bio.getbuffer()) # Write local file - # Decompress/unzip and decode the binary file - text = gzip.decompress(bio.getvalue()).decode('latin1') - # Convert string to StringIO and parse data - dfi, metadata = parse_bsrn(io.StringIO(text), logical_records) + # Open gzip file and convert to StringIO + gzip_file = io.TextIOWrapper(gzip.GzipFile(fileobj=bio), + encoding='latin1') + dfi, metadata = parse_bsrn(gzip_file, logical_records) dfs.append(dfi) # FTP client raises an error if the file does not exist on server except ftplib.error_perm as e: @@ -209,7 +211,7 @@ def get_bsrn(start, end, station, username, password, logical_records=['0100'], return data, metadata -def parse_bsrn(fbuf, logical_records=['0100']): +def parse_bsrn(fbuf, logical_records=('0100',)): """ Parse a file-like buffer of a BSRN station-to-archive file. @@ -217,15 +219,16 @@ def parse_bsrn(fbuf, logical_records=['0100']): ---------- fbuf: file-like buffer Buffer of a BSRN station-to-archive data file - logical_records: str or list, default: ['0100'] - List of the logical records (LR) to parse. Options are: '0100', '0300', - and '0500'. + logical_records: list or tuple, default: ('0100',) + List of the logical records (LR) to parse. Options include: '0100', + '0300', and '0500'. Returns ------- data: DataFrame timeseries data from the BSRN archive, see - :func:`pvlib.iotools.read_bsrn` for fields. + :func:`pvlib.iotools.read_bsrn` for fields. An empty DataFrame is + returned if the specified logical records were not found. metadata: dict Dictionary containing metadata (primarily from LR0004). @@ -253,7 +256,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): metadata['date when station description changed'] = fbuf.readline().strip() metadata['surface type'] = int(fbuf.readline(3)) metadata['topography type'] = int(fbuf.readline()) - metadata['address'] = fbuf.readline().strip().strip() + metadata['address'] = fbuf.readline().strip() metadata['telephone no. of station'] = fbuf.readline(20).strip() metadata['FAX no. of station'] = fbuf.readline().strip() metadata['TCP/IP no. of station'] = fbuf.readline(15).strip() @@ -284,15 +287,15 @@ def parse_bsrn(fbuf, logical_records=['0100']): for num, line in enumerate(fbuf): if line.startswith('*'): # Find start of all logical records if len(lr_startrow) >= 1: - lr_nrows[lr] = num - max(lr_startrow.values())-1 # noqa: F821 + lr_nrows[lr] = num - lr_startrow[lr] - 1 # noqa: F821 lr = line[2:6] # string of 4 digit LR number lr_startrow[lr] = num lr_nrows[lr] = num - lr_startrow[lr] - for lr in list(logical_records): + for lr in logical_records: if lr not in ['0100', '0300', '0500']: raise ValueError(f"Logical record {lr} not in " - f"{['0100', '0300','0500']}.") + "['0100', '0300','0500'].") dfs = [] # Initialize empty list for dataframe # Parse LR0100 - basic measurements including GHI, DNI, DHI and temperature @@ -353,7 +356,7 @@ def parse_bsrn(fbuf, logical_records=['0100']): return data, metadata -def read_bsrn(filename, logical_records=['0100']): +def read_bsrn(filename, logical_records=('0100',)): """ Read a BSRN station-to-archive file into a DataFrame. @@ -372,16 +375,16 @@ def read_bsrn(filename, logical_records=['0100']): ---------- filename: str or path-like Name or path of a BSRN station-to-archive data file - logical_records: str or list, default: ['0100'] - List of the logical records (LR) to parse. Options are: '0100', '0300', - and '0500'. + logical_records: list or tuple, default: ('0100',) + List of the logical records (LR) to parse. Options include: '0100', + '0300', and '0500'. Returns ------- data: DataFrame - A DataFrame with the columns as described below. For more extensive + A DataFrame with the columns as described below. For a more extensive description of the variables, consult [2]_. An empty DataFrame is - returned if the specificed logical records were not found. + returned if the specified logical records were not found. metadata: dict Dictionary containing metadata (primarily from LR0004). @@ -405,7 +408,7 @@ def read_bsrn(filename, logical_records=['0100']): **Logical record 0300** --------------------------------------------------------------------------- gri† float Mean ground-reflected irradiance [W/m^2] - lwu† float Mean lowng-wave upwelling irradiance [W/m^2] + lwu† float Mean long-wave upwelling irradiance [W/m^2] net_radiation† float Mean net radiation (net radiometer) [W/m^2] ----------------------- ------ ------------------------------------------ **Logical record 0500** @@ -418,12 +421,12 @@ def read_bsrn(filename, logical_records=['0100']): ======================= ====== ========================================== † Marked variables have corresponding columns for the standard deviation - (_std), minimum (_min), and maximum (_max) based on the 60 samples made - for each minute. + (_std), minimum (_min), and maximum (_max) calculated from the 60 samples + that are average into each 1-minute measurement. Hint ---- - According to [2]_ ""All time labels in the station-to-archive files denote + According to [2]_ "All time labels in the station-to-archive files denote the start of a time interval." This corresponds to left bin edge labeling. See Also diff --git a/pvlib/tests/iotools/test_bsrn.py b/pvlib/tests/iotools/test_bsrn.py index a2873c37b2..412cbd5e8f 100644 --- a/pvlib/tests/iotools/test_bsrn.py +++ b/pvlib/tests/iotools/test_bsrn.py @@ -14,7 +14,7 @@ def bsrn_credentials(): """Supplies the BSRN FTP credentials for testing purposes. - Users should obtain there own credentials as described in the `read_bsrn` + Users should obtain their own credentials as described in the `read_bsrn` documentation.""" bsrn_username = os.environ["BSRN_FTP_USERNAME"] bsrn_password = os.environ["BSRN_FTP_PASSWORD"] @@ -64,7 +64,7 @@ def test_read_bsrn_logical_records_not_found(): # Test if an empty dataframe is returned if specified LRs are not present data, metadata = read_bsrn(DATA_DIR / 'bsrn-lr0100-pay0616.dat', logical_records=['0300', '0500']) - assert_index_equal(pd.DataFrame().index, data.index) + assert data.empty # assert that the dataframe is empty assert 'uva_global' in data.columns assert 'uvb_reflected_std' in data.columns assert 'uva_global_max' in data.columns From 4ca69f62e75fc90497f408f104fdd5238e297a23 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Thu, 22 Jul 2021 13:44:07 -0600 Subject: [PATCH 31/32] Add lat/lon ISO 19115 convention to metadata --- pvlib/iotools/bsrn.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 4b58400862..3ff347d652 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -261,8 +261,10 @@ def parse_bsrn(fbuf, logical_records=('0100',)): metadata['FAX no. of station'] = fbuf.readline().strip() metadata['TCP/IP no. of station'] = fbuf.readline(15).strip() metadata['e-mail address of station'] = fbuf.readline().strip() - metadata['latitude'] = float(fbuf.readline(8)) - metadata['longitude'] = float(fbuf.readline(8)) + metadata['latitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention + metadata['latitude'] = metadata['latitude_bsrn'] - 90 # ISO 19115 + metadata['longitude_bsrn'] = float(fbuf.readline(8)) # BSRN convention + metadata['longitude'] = metadata['longitude_bsrn'] - 180 # ISO 19115 metadata['altitude'] = int(fbuf.readline(5)) metadata['identification of "SYNOP" station'] = fbuf.readline().strip() metadata['date when horizon changed'] = fbuf.readline().strip() @@ -287,7 +289,7 @@ def parse_bsrn(fbuf, logical_records=('0100',)): for num, line in enumerate(fbuf): if line.startswith('*'): # Find start of all logical records if len(lr_startrow) >= 1: - lr_nrows[lr] = num - lr_startrow[lr] - 1 # noqa: F821 + lr_nrows[lr] = num - lr_startrow[lr] - 1 # noqa: F821 lr = line[2:6] # string of 4 digit LR number lr_startrow[lr] = num lr_nrows[lr] = num - lr_startrow[lr] From 2f2cda55e34d167c510144f9cf53e657de658695 Mon Sep 17 00:00:00 2001 From: AdamRJensen Date: Thu, 22 Jul 2021 13:45:23 -0600 Subject: [PATCH 32/32] Add bio.seek(0) to get_bsrn() --- pvlib/iotools/bsrn.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pvlib/iotools/bsrn.py b/pvlib/iotools/bsrn.py index 3ff347d652..c9c3abb93d 100644 --- a/pvlib/iotools/bsrn.py +++ b/pvlib/iotools/bsrn.py @@ -183,6 +183,7 @@ def get_bsrn(start, end, station, username, password, with open(os.path.join(local_path, filename), 'wb') as f: f.write(bio.getbuffer()) # Write local file # Open gzip file and convert to StringIO + bio.seek(0) # reset buffer to start of file gzip_file = io.TextIOWrapper(gzip.GzipFile(fileobj=bio), encoding='latin1') dfi, metadata = parse_bsrn(gzip_file, logical_records)