From 5c94c645f4f15a1bf132d9947e4956cf46a20b28 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 10:26:31 -0600 Subject: [PATCH 01/60] refactor detect_clearsky --- pvlib/clearsky.py | 473 +++++++++++++++++++++++++++------------------- 1 file changed, 281 insertions(+), 192 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index a6794e2195..cf812b33a8 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -597,14 +597,263 @@ def _calc_d(aod700, p): return d +def bird(zenith, airmass_relative, aod380, aod500, precipitable_water, + ozone=0.3, pressure=101325., dni_extra=1364., asymmetry=0.85, + albedo=0.2): + """ + Bird Simple Clear Sky Broadband Solar Radiation Model + + Based on NREL Excel implementation by Daryl R. Myers [1, 2]. + + Bird and Hulstrom define the zenith as the "angle between a line to + the sun and the local zenith". There is no distinction in the paper + between solar zenith and apparent (or refracted) zenith, but the + relative airmass is defined using the Kasten 1966 expression, which + requires apparent zenith. Although the formulation for calculated + zenith is never explicitly defined in the report, since the purpose + was to compare existing clear sky models with "rigorous radiative + transfer models" (RTM) it is possible that apparent zenith was + obtained as output from the RTM. However, the implentation presented + in PVLIB is tested against the NREL Excel implementation by Daryl + Myers which uses an analytical expression for solar zenith instead + of apparent zenith. + + Parameters + ---------- + zenith : numeric + Solar or apparent zenith angle in degrees - see note above + airmass_relative : numeric + Relative airmass + aod380 : numeric + Aerosol optical depth [cm] measured at 380[nm] + aod500 : numeric + Aerosol optical depth [cm] measured at 500[nm] + precipitable_water : numeric + Precipitable water [cm] + ozone : numeric + Atmospheric ozone [cm], defaults to 0.3[cm] + pressure : numeric + Ambient pressure [Pa], defaults to 101325[Pa] + dni_extra : numeric + Extraterrestrial radiation [W/m^2], defaults to 1364[W/m^2] + asymmetry : numeric + Asymmetry factor, defaults to 0.85 + albedo : numeric + Albedo, defaults to 0.2 + + Returns + ------- + clearsky : DataFrame (if Series input) or OrderedDict of arrays + DataFrame/OrderedDict contains the columns/keys + ``'dhi', 'dni', 'ghi', 'direct_horizontal'`` in [W/m^2]. + + See also + -------- + pvlib.atmosphere.bird_hulstrom80_aod_bb + pvlib.atmosphere.get_relative_airmass + + References + ---------- + .. [1] R. E. Bird and R. L Hulstrom, "A Simplified Clear Sky model for + Direct and Diffuse Insolation on Horizontal Surfaces" SERI Technical + Report SERI/TR-642-761, Feb 1981. Solar Energy Research Institute, + Golden, CO. + + .. [2] Daryl R. Myers, "Solar Radiation: Practical Modeling for Renewable + Energy Applications", pp. 46-51 CRC Press (2013) + + .. [3] `NREL Bird Clear Sky Model `_ + + .. [4] `SERI/TR-642-761 `_ + + .. [5] `Error Reports `_ + """ + etr = dni_extra # extraradiation + ze_rad = np.deg2rad(zenith) # zenith in radians + airmass = airmass_relative + # Bird clear sky model + am_press = atmosphere.get_absolute_airmass(airmass, pressure) + t_rayleigh = ( + np.exp(-0.0903 * am_press ** 0.84 * ( + 1.0 + am_press - am_press ** 1.01 + )) + ) + am_o3 = ozone*airmass + t_ozone = ( + 1.0 - 0.1611 * am_o3 * (1.0 + 139.48 * am_o3) ** -0.3034 - + 0.002715 * am_o3 / (1.0 + 0.044 * am_o3 + 0.0003 * am_o3 ** 2.0) + ) + t_gases = np.exp(-0.0127 * am_press ** 0.26) + am_h2o = airmass * precipitable_water + t_water = ( + 1.0 - 2.4959 * am_h2o / ( + (1.0 + 79.034 * am_h2o) ** 0.6828 + 6.385 * am_h2o + ) + ) + bird_huldstrom = atmosphere.bird_hulstrom80_aod_bb(aod380, aod500) + t_aerosol = np.exp( + -(bird_huldstrom ** 0.873) * + (1.0 + bird_huldstrom - bird_huldstrom ** 0.7088) * airmass ** 0.9108 + ) + taa = 1.0 - 0.1 * (1.0 - airmass + airmass ** 1.06) * (1.0 - t_aerosol) + rs = 0.0685 + (1.0 - asymmetry) * (1.0 - t_aerosol / taa) + id_ = 0.9662 * etr * t_aerosol * t_water * t_gases * t_ozone * t_rayleigh + ze_cos = np.where(zenith < 90, np.cos(ze_rad), 0.0) + id_nh = id_ * ze_cos + ias = ( + etr * ze_cos * 0.79 * t_ozone * t_gases * t_water * taa * + (0.5 * (1.0 - t_rayleigh) + asymmetry * (1.0 - (t_aerosol / taa))) / ( + 1.0 - airmass + airmass ** 1.02 + ) + ) + gh = (id_nh + ias) / (1.0 - albedo * rs) + diffuse_horiz = gh - id_nh + # TODO: be DRY, use decorator to wrap methods that need to return either + # OrderedDict or DataFrame instead of repeating this boilerplate code + irrads = OrderedDict() + irrads['direct_horizontal'] = id_nh + irrads['ghi'] = gh + irrads['dni'] = id_ + irrads['dhi'] = diffuse_horiz + if isinstance(irrads['dni'], pd.Series): + irrads = pd.DataFrame.from_dict(irrads) + return irrads + + +def _calc_stats(data, samples_per_window, sample_interval, H, align): + """ Calculates statistics for each window, used by Reno-style clear + sky detection functions + + Parameters + ---------- + data : Series + samples_per_window : int + Number of data points in each window + sample_interval : float + Time in minutes in each sample interval + H : np.array + 2D Hankel matrix that arranges data into sliding windows + align : bool, default 'left' + Alignment of labels to data in sliding window. Must be one of + 'left', 'center', 'right'. + + Returns + ------- + data_mean : np.array + mean of data in each window + data_max : np.array + maximum of data in each window + data_line_length : np.array + length of line segments connecting successive data points + data_slope_nstd : np.array + standard deviation of difference between data points in each window + data_slope : np.array + difference between successive data points + """ + + shift = _shift_from_align(align, samples_per_window) + center = align=='center' + data_mean = data.rolling(samples_per_window, + center=center).mean().shift(shift) + data_max = data.rolling(samples_per_window, + center=center).max().shift(shift) + # shift to get forward difference, .diff() is backward difference instead + data_diff = data.diff().shift(-1) + data_slope = data_diff / sample_interval + # because data_slope_nstd is calculated on differenes the window is + # shortened by 1. :gh:issue:`1070` + data_slope_nstd = data.rolling(samples_per_window, center=center).apply(_calc_slope_nstd) + data_slope_nstd = data_slope_nstd.shift(shift) + data_line_length = data.rolling(samples_per_window, center=center).apply(_calc_line_length, args=(sample_interval,)) + data_line_length = data_line_length.shift(shift) + + return (data_mean, data_max, data_line_length, data_slope_nstd, data_slope) + + +def _calc_slope_nstd(data): + return data.diff().std() / data.mean() + + +def _shift_from_align(align, window): + # pandas.Series.rolling uses kwarg center with values of True or False + # default is False which means right aligned labels by default + # calculate shift to align='left' (legacy pvlib behavior) + if align=='left': + shift = 1 - window + else: + shift = 0 + return shift + + +def _count_in_window(ts, window): + # return int type instead of default int64, to avoid error in pandas + return int(ts.resample(window).agg('sum')[0]) + + +def _get_sample_intervals(times, window_length): + """ Calculates timeinterval and samples per window for Reno-style clear + sky detection functions + """ + window = pd.Timedelta(minutes=window_length) + temp = pd.Series(data=1, index=times) + + # determine if we can proceed + if times.inferred_freq and len(temp.unique()) == 1: + sample_interval = times[1] - times[0] + sample_interval = sample_interval.seconds / 60 # interval in minutes + samples_per_window = _count_in_window(temp, window) + return sample_interval, samples_per_window + else: + raise NotImplementedError('algorithm does not yet support unequal ' \ + 'times. consider resampling your data.') + + +def _calc_line_length(data, sample_interval): + """ Calculates line length for Reno-style clear sky detection functions. + """ + return np.sum(np.sqrt(data.diff()**2 + sample_interval**2)) + + +def _calc_c5(meas_slope, clear_slope, window, align, limit): + shift = _shift_from_align(align, window) + center = align=='center' + slope_diff = np.abs(meas_slope - clear_slope) + slope_diff = slope_diff.rolling( + window - 1, center=center).max().shift(shift + 1) + return slope_diff < limit + + +def _clear_sample_index(clear_windows, samples_per_window, align, H): + """ + Returns indices of clear samples in clear windows + """ + if align=='right': + shift = 1 - samples_per_window + elif align=='center': + shift = - (samples_per_window // 2) + else: + shift = 0 + # first window is in first column of H. clear_windows is aligned by 'align' + # shift clear_windows so that first window is in left position, lined up + # with first column of H + idx = clear_windows.shift(shift) + idx.drop(clear_windows.tail(samples_per_window - 1).index, inplace=True) + idx = idx.astype(bool) # shift added nan, changed type to object + clear_samples = np.unique(H[:, idx]) + return clear_samples + + def detect_clearsky(measured, clearsky, times, window_length, mean_diff=75, max_diff=75, lower_line_length=-5, upper_line_length=10, var_diff=0.005, slope_dev=8, max_iterations=20, - return_components=False): + align='left', return_components=False): """ Detects clear sky times according to the algorithm developed by Reno - and Hansen for GHI measurements. The algorithm [1]_ was designed and + and Hansen for GHI measurements [1]. The algorithm was designed and validated for analyzing GHI time series only. Users may attempt to apply it to other types of time series data using different filter settings, but should be skeptical of the results. @@ -653,6 +902,9 @@ def detect_clearsky(measured, clearsky, times, window_length, max_iterations : int, default 20 Maximum number of times to apply a different scaling factor to the clearsky and redetermine clear_samples. Must be 1 or larger. + align : bool, default 'left' + Alignment of labels to data in sliding window. Must be one of + 'left', 'center', 'right'. return_components : bool, default False Controls if additional output should be returned. See below. @@ -673,9 +925,9 @@ def detect_clearsky(measured, clearsky, times, window_length, References ---------- - .. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear - sky irradiance in time series of GHI measurements" Renewable Energy, - v90, p. 520-531, 2016. + [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear + sky irradiance in time series of GHI measurements" Renewable Energy, + v90, p. 520-531, 2016. Notes ----- @@ -694,54 +946,26 @@ def detect_clearsky(measured, clearsky, times, window_length, parameter """ - # calculate deltas in units of minutes (matches input window_length units) - deltas = np.diff(times.values) / np.timedelta64(1, '60s') - - # determine the unique deltas and if we can proceed - unique_deltas = np.unique(deltas) - if len(unique_deltas) == 1: - sample_interval = unique_deltas[0] - else: - raise NotImplementedError('algorithm does not yet support unequal ' - 'times. consider resampling your data.') - - intervals_per_window = int(window_length / sample_interval) + sample_interval, samples_per_window = _get_sample_intervals(times, + window_length) # generate matrix of integers for creating windows with indexing - from scipy.linalg import hankel - H = hankel(np.arange(intervals_per_window), # noqa: N806 - np.arange(intervals_per_window - 1, len(times))) - - # convert pandas input to numpy array, but save knowledge of input state - # so we can return a series if that's what was originally provided - ispandas = isinstance(measured, pd.Series) - measured = np.asarray(measured) - clearsky = np.asarray(clearsky) + H = hankel(np.arange(samples_per_window), + np.arange(samples_per_window-1, len(times))) # calculate measurement statistics - meas_mean = np.mean(measured[H], axis=0) - meas_max = np.max(measured[H], axis=0) - meas_diff = np.diff(measured[H], n=1, axis=0) - meas_slope = np.diff(measured[H], n=1, axis=0) / sample_interval - # matlab std function normalizes by N-1, so set ddof=1 here - meas_slope_nstd = np.std(meas_slope, axis=0, ddof=1) / meas_mean - meas_line_length = np.sum(np.sqrt( - meas_diff * meas_diff + - sample_interval * sample_interval), axis=0) + meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ + = _calc_stats(measured, samples_per_window, sample_interval, H, align) # calculate clear sky statistics - clear_mean = np.mean(clearsky[H], axis=0) - clear_max = np.max(clearsky[H], axis=0) - clear_diff = np.diff(clearsky[H], n=1, axis=0) - clear_slope = np.diff(clearsky[H], n=1, axis=0) / sample_interval - - from scipy.optimize import minimize_scalar + clear_mean, clear_max, _, _, clear_slope \ + = _calc_stats(clearsky, samples_per_window, sample_interval, H, align) alpha = 1 for iteration in range(max_iterations): - clear_line_length = np.sum(np.sqrt( - alpha * alpha * clear_diff * clear_diff + - sample_interval * sample_interval), axis=0) + _, _, clear_line_length, _, _ = _calc_stats( + alpha * clearsky, samples_per_window, sample_interval, H, + align) line_diff = meas_line_length - clear_line_length @@ -750,179 +974,44 @@ def detect_clearsky(measured, clearsky, times, window_length, c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - c5 = np.max(np.abs(meas_slope - - alpha * clear_slope), axis=0) < slope_dev + c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window, + align, slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return clear_samples = np.full_like(measured, False, dtype='bool') # find the samples contained in any window classified as clear - clear_samples[np.unique(H[:, clear_windows])] = True + idx = _clear_sample_index(clear_windows, samples_per_window, align, H) + clear_samples[idx] = True # find a new alpha previous_alpha = alpha clear_meas = measured[clear_samples] clear_clear = clearsky[clear_samples] - def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) - alpha = minimize_scalar(rmse).x if round(alpha*10000) == round(previous_alpha*10000): break else: import warnings - warnings.warn('failed to converge after %s iterations' + warnings.warn('failed to converge after %s iterations' \ % max_iterations, RuntimeWarning) # be polite about returning the same type as was input - if ispandas: + if isinstance(measured, pd.Series): clear_samples = pd.Series(clear_samples, index=times) if return_components: components = OrderedDict() - components['mean_diff_flag'] = c1 - components['max_diff_flag'] = c2 - components['line_length_flag'] = c3 - components['slope_nstd_flag'] = c4 - components['slope_max_flag'] = c5 - components['mean_nan_flag'] = c6 - components['windows'] = clear_windows - - components['mean_diff'] = np.abs(meas_mean - alpha * clear_mean) - components['max_diff'] = np.abs(meas_max - alpha * clear_max) - components['line_length'] = meas_line_length - clear_line_length - components['slope_nstd'] = meas_slope_nstd - components['slope_max'] = (np.max( - meas_slope - alpha * clear_slope, axis=0)) - + components['mean_diff'] = c1 + components['max_diff'] = c2 + components['line_length'] = c3 + components['slope_nstd'] = c4 + components['slope_max'] = c5 + components['mean_nan'] = c6 +# components['windows'] = clear_windows return clear_samples, components, alpha else: return clear_samples - - -def bird(zenith, airmass_relative, aod380, aod500, precipitable_water, - ozone=0.3, pressure=101325., dni_extra=1364., asymmetry=0.85, - albedo=0.2): - """ - Bird Simple Clear Sky Broadband Solar Radiation Model - - Based on NREL Excel implementation by Daryl R. Myers [1, 2]. - - Bird and Hulstrom define the zenith as the "angle between a line to - the sun and the local zenith". There is no distinction in the paper - between solar zenith and apparent (or refracted) zenith, but the - relative airmass is defined using the Kasten 1966 expression, which - requires apparent zenith. Although the formulation for calculated - zenith is never explicitly defined in the report, since the purpose - was to compare existing clear sky models with "rigorous radiative - transfer models" (RTM) it is possible that apparent zenith was - obtained as output from the RTM. However, the implentation presented - in PVLIB is tested against the NREL Excel implementation by Daryl - Myers which uses an analytical expression for solar zenith instead - of apparent zenith. - - Parameters - ---------- - zenith : numeric - Solar or apparent zenith angle in degrees - see note above - airmass_relative : numeric - Relative airmass - aod380 : numeric - Aerosol optical depth [cm] measured at 380[nm] - aod500 : numeric - Aerosol optical depth [cm] measured at 500[nm] - precipitable_water : numeric - Precipitable water [cm] - ozone : numeric - Atmospheric ozone [cm], defaults to 0.3[cm] - pressure : numeric - Ambient pressure [Pa], defaults to 101325[Pa] - dni_extra : numeric - Extraterrestrial radiation [W/m^2], defaults to 1364[W/m^2] - asymmetry : numeric - Asymmetry factor, defaults to 0.85 - albedo : numeric - Albedo, defaults to 0.2 - - Returns - ------- - clearsky : DataFrame (if Series input) or OrderedDict of arrays - DataFrame/OrderedDict contains the columns/keys - ``'dhi', 'dni', 'ghi', 'direct_horizontal'`` in [W/m^2]. - - See also - -------- - pvlib.atmosphere.bird_hulstrom80_aod_bb - pvlib.atmosphere.get_relative_airmass - - References - ---------- - .. [1] R. E. Bird and R. L Hulstrom, "A Simplified Clear Sky model for - Direct and Diffuse Insolation on Horizontal Surfaces" SERI Technical - Report SERI/TR-642-761, Feb 1981. Solar Energy Research Institute, - Golden, CO. - - .. [2] Daryl R. Myers, "Solar Radiation: Practical Modeling for Renewable - Energy Applications", pp. 46-51 CRC Press (2013) - - .. [3] `NREL Bird Clear Sky Model `_ - - .. [4] `SERI/TR-642-761 `_ - - .. [5] `Error Reports `_ - """ - etr = dni_extra # extraradiation - ze_rad = np.deg2rad(zenith) # zenith in radians - airmass = airmass_relative - # Bird clear sky model - am_press = atmosphere.get_absolute_airmass(airmass, pressure) - t_rayleigh = ( - np.exp(-0.0903 * am_press ** 0.84 * ( - 1.0 + am_press - am_press ** 1.01 - )) - ) - am_o3 = ozone*airmass - t_ozone = ( - 1.0 - 0.1611 * am_o3 * (1.0 + 139.48 * am_o3) ** -0.3034 - - 0.002715 * am_o3 / (1.0 + 0.044 * am_o3 + 0.0003 * am_o3 ** 2.0) - ) - t_gases = np.exp(-0.0127 * am_press ** 0.26) - am_h2o = airmass * precipitable_water - t_water = ( - 1.0 - 2.4959 * am_h2o / ( - (1.0 + 79.034 * am_h2o) ** 0.6828 + 6.385 * am_h2o - ) - ) - bird_huldstrom = atmosphere.bird_hulstrom80_aod_bb(aod380, aod500) - t_aerosol = np.exp( - -(bird_huldstrom ** 0.873) * - (1.0 + bird_huldstrom - bird_huldstrom ** 0.7088) * airmass ** 0.9108 - ) - taa = 1.0 - 0.1 * (1.0 - airmass + airmass ** 1.06) * (1.0 - t_aerosol) - rs = 0.0685 + (1.0 - asymmetry) * (1.0 - t_aerosol / taa) - id_ = 0.9662 * etr * t_aerosol * t_water * t_gases * t_ozone * t_rayleigh - ze_cos = np.where(zenith < 90, np.cos(ze_rad), 0.0) - id_nh = id_ * ze_cos - ias = ( - etr * ze_cos * 0.79 * t_ozone * t_gases * t_water * taa * - (0.5 * (1.0 - t_rayleigh) + asymmetry * (1.0 - (t_aerosol / taa))) / ( - 1.0 - airmass + airmass ** 1.02 - ) - ) - gh = (id_nh + ias) / (1.0 - albedo * rs) - diffuse_horiz = gh - id_nh - # TODO: be DRY, use decorator to wrap methods that need to return either - # OrderedDict or DataFrame instead of repeating this boilerplate code - irrads = OrderedDict() - irrads['direct_horizontal'] = id_nh - irrads['ghi'] = gh - irrads['dni'] = id_ - irrads['dhi'] = diffuse_horiz - if isinstance(irrads['dni'], pd.Series): - irrads = pd.DataFrame.from_dict(irrads) - return irrads From af9a314806d9f792880d69d2cd130d7dd1fefbac Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 10:38:58 -0600 Subject: [PATCH 02/60] formatting --- pvlib/clearsky.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index cf812b33a8..3954b4f0c8 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -9,6 +9,8 @@ import numpy as np import pandas as pd +from scipy.optimize import minimize_scalar +from scipy.linalg import hankel from pvlib import atmosphere, tools @@ -853,7 +855,7 @@ def detect_clearsky(measured, clearsky, times, window_length, align='left', return_components=False): """ Detects clear sky times according to the algorithm developed by Reno - and Hansen for GHI measurements [1]. The algorithm was designed and + and Hansen for GHI measurements. The algorithm [1]_ was designed and validated for analyzing GHI time series only. Users may attempt to apply it to other types of time series data using different filter settings, but should be skeptical of the results. @@ -925,9 +927,9 @@ def detect_clearsky(measured, clearsky, times, window_length, References ---------- - [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear - sky irradiance in time series of GHI measurements" Renewable Energy, - v90, p. 520-531, 2016. + .. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear + sky irradiance in time series of GHI measurements" Renewable Energy, + v90, p. 520-531, 2016. Notes ----- @@ -996,7 +998,7 @@ def rmse(alpha): break else: import warnings - warnings.warn('failed to converge after %s iterations' \ + warnings.warn('failed to converge after %s iterations' % max_iterations, RuntimeWarning) # be polite about returning the same type as was input From ea5021fe65df13b72e4a6ae50689d0182d850c5b Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 10:54:19 -0600 Subject: [PATCH 03/60] make array input work --- pvlib/clearsky.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 3954b4f0c8..5f5ee21046 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -948,6 +948,15 @@ def detect_clearsky(measured, clearsky, times, window_length, parameter """ + # be polite about returning the same type as was input + ispandas = isinstance(measured, pd.Series) + + # for internal use, need a Series + if not ispandas: + meas = pd.Series(measured, index=times) + else: + meas = measured + sample_interval, samples_per_window = _get_sample_intervals(times, window_length) @@ -957,7 +966,7 @@ def detect_clearsky(measured, clearsky, times, window_length, # calculate measurement statistics meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ - = _calc_stats(measured, samples_per_window, sample_interval, H, align) + = _calc_stats(meas, samples_per_window, sample_interval, H, align) # calculate clear sky statistics clear_mean, clear_max, _, _, clear_slope \ @@ -982,14 +991,14 @@ def detect_clearsky(measured, clearsky, times, window_length, clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return - clear_samples = np.full_like(measured, False, dtype='bool') + clear_samples = np.full_like(meas, False, dtype='bool') # find the samples contained in any window classified as clear idx = _clear_sample_index(clear_windows, samples_per_window, align, H) clear_samples[idx] = True # find a new alpha previous_alpha = alpha - clear_meas = measured[clear_samples] + clear_meas = meas[clear_samples] clear_clear = clearsky[clear_samples] def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) @@ -1002,7 +1011,7 @@ def rmse(alpha): % max_iterations, RuntimeWarning) # be polite about returning the same type as was input - if isinstance(measured, pd.Series): + if ispandas: clear_samples = pd.Series(clear_samples, index=times) if return_components: From 2a5cdeb0efbad560760fb2b0c9c4e59ac00472f8 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 11:01:13 -0600 Subject: [PATCH 04/60] lint, make clearsky a Series --- pvlib/clearsky.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 5f5ee21046..39aed1ae85 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -757,7 +757,7 @@ def _calc_stats(data, samples_per_window, sample_interval, H, align): """ shift = _shift_from_align(align, samples_per_window) - center = align=='center' + center = align == 'center' data_mean = data.rolling(samples_per_window, center=center).mean().shift(shift) data_max = data.rolling(samples_per_window, @@ -767,9 +767,11 @@ def _calc_stats(data, samples_per_window, sample_interval, H, align): data_slope = data_diff / sample_interval # because data_slope_nstd is calculated on differenes the window is # shortened by 1. :gh:issue:`1070` - data_slope_nstd = data.rolling(samples_per_window, center=center).apply(_calc_slope_nstd) + data_slope_nstd = data.rolling(samples_per_window, center=center).apply( + _calc_slope_nstd) data_slope_nstd = data_slope_nstd.shift(shift) - data_line_length = data.rolling(samples_per_window, center=center).apply(_calc_line_length, args=(sample_interval,)) + data_line_length = data.rolling(samples_per_window, center=center).apply( + _calc_line_length, args=(sample_interval,)) data_line_length = data_line_length.shift(shift) return (data_mean, data_max, data_line_length, data_slope_nstd, data_slope) @@ -783,7 +785,7 @@ def _shift_from_align(align, window): # pandas.Series.rolling uses kwarg center with values of True or False # default is False which means right aligned labels by default # calculate shift to align='left' (legacy pvlib behavior) - if align=='left': + if align == 'left': shift = 1 - window else: shift = 0 @@ -809,7 +811,7 @@ def _get_sample_intervals(times, window_length): samples_per_window = _count_in_window(temp, window) return sample_interval, samples_per_window else: - raise NotImplementedError('algorithm does not yet support unequal ' \ + raise NotImplementedError('algorithm does not yet support unequal ' 'times. consider resampling your data.') @@ -821,7 +823,7 @@ def _calc_line_length(data, sample_interval): def _calc_c5(meas_slope, clear_slope, window, align, limit): shift = _shift_from_align(align, window) - center = align=='center' + center = align == 'center' slope_diff = np.abs(meas_slope - clear_slope) slope_diff = slope_diff.rolling( window - 1, center=center).max().shift(shift + 1) @@ -832,9 +834,9 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): """ Returns indices of clear samples in clear windows """ - if align=='right': + if align == 'right': shift = 1 - samples_per_window - elif align=='center': + elif align == 'center': shift = - (samples_per_window // 2) else: shift = 0 @@ -957,6 +959,11 @@ def detect_clearsky(measured, clearsky, times, window_length, else: meas = measured + if not isinstance(clearsky, pd.Series): + clear = pd.Series(clearsky, index=times) + else: + clear = clearsky + sample_interval, samples_per_window = _get_sample_intervals(times, window_length) @@ -970,13 +977,12 @@ def detect_clearsky(measured, clearsky, times, window_length, # calculate clear sky statistics clear_mean, clear_max, _, _, clear_slope \ - = _calc_stats(clearsky, samples_per_window, sample_interval, H, align) + = _calc_stats(clear, samples_per_window, sample_interval, H, align) alpha = 1 for iteration in range(max_iterations): _, _, clear_line_length, _, _ = _calc_stats( - alpha * clearsky, samples_per_window, sample_interval, H, - align) + alpha * clear, samples_per_window, sample_interval, H, align) line_diff = meas_line_length - clear_line_length @@ -999,7 +1005,7 @@ def detect_clearsky(measured, clearsky, times, window_length, # find a new alpha previous_alpha = alpha clear_meas = meas[clear_samples] - clear_clear = clearsky[clear_samples] + clear_clear = clear[clear_samples] def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) alpha = minimize_scalar(rmse).x From 6f39e6bd4b4bfc2a3010b249c22eee6db62472a0 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 13:07:35 -0600 Subject: [PATCH 05/60] make work for py36min --- pvlib/clearsky.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 39aed1ae85..efd38cbe51 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -725,7 +725,7 @@ def bird(zenith, airmass_relative, aod380, aod500, precipitable_water, return irrads -def _calc_stats(data, samples_per_window, sample_interval, H, align): +def _calc_stats(data, samples_per_window, sample_interval, align): """ Calculates statistics for each window, used by Reno-style clear sky detection functions @@ -736,8 +736,6 @@ def _calc_stats(data, samples_per_window, sample_interval, H, align): Number of data points in each window sample_interval : float Time in minutes in each sample interval - H : np.array - 2D Hankel matrix that arranges data into sliding windows align : bool, default 'left' Alignment of labels to data in sliding window. Must be one of 'left', 'center', 'right'. @@ -778,7 +776,7 @@ def _calc_stats(data, samples_per_window, sample_interval, H, align): def _calc_slope_nstd(data): - return data.diff().std() / data.mean() + return np.diff(data).std() / data.mean() def _shift_from_align(align, window): From 153c7fdcd2fca67057fb8542affe374dfe6266f9 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 13:16:29 -0600 Subject: [PATCH 06/60] update arguments in call --- pvlib/clearsky.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index efd38cbe51..2a0bcc3fab 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -971,16 +971,16 @@ def detect_clearsky(measured, clearsky, times, window_length, # calculate measurement statistics meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ - = _calc_stats(meas, samples_per_window, sample_interval, H, align) + = _calc_stats(meas, samples_per_window, sample_interval, align) # calculate clear sky statistics clear_mean, clear_max, _, _, clear_slope \ - = _calc_stats(clear, samples_per_window, sample_interval, H, align) + = _calc_stats(clear, samples_per_window, sample_interval, align) alpha = 1 for iteration in range(max_iterations): _, _, clear_line_length, _, _ = _calc_stats( - alpha * clear, samples_per_window, sample_interval, H, align) + alpha * clear, samples_per_window, sample_interval, align) line_diff = meas_line_length - clear_line_length From 8e719ae4a85cf3963b04535e5c4d03b7ebb6152e Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 5 Oct 2020 13:37:50 -0600 Subject: [PATCH 07/60] fit another diff --- pvlib/clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 2a0bcc3fab..a7f4f5b2ac 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -816,7 +816,7 @@ def _get_sample_intervals(times, window_length): def _calc_line_length(data, sample_interval): """ Calculates line length for Reno-style clear sky detection functions. """ - return np.sum(np.sqrt(data.diff()**2 + sample_interval**2)) + return np.sum(np.sqrt(np.diff(data)**2 + sample_interval**2)) def _calc_c5(meas_slope, clear_slope, window, align, limit): From d680f77103b09f3854bcd1051c9a95b415d21b4d Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 6 Oct 2020 13:10:28 -0600 Subject: [PATCH 08/60] fix ddof, add test for _calc_stats with window=3 --- pvlib/clearsky.py | 12 ++++++------ pvlib/tests/test_clearsky.py | 6 ++++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index a7f4f5b2ac..1ed78544ff 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -742,15 +742,15 @@ def _calc_stats(data, samples_per_window, sample_interval, align): Returns ------- - data_mean : np.array + data_mean : Series mean of data in each window - data_max : np.array + data_max : Series maximum of data in each window - data_line_length : np.array + data_line_length : Series length of line segments connecting successive data points - data_slope_nstd : np.array + data_slope_nstd : Series standard deviation of difference between data points in each window - data_slope : np.array + data_slope : Series difference between successive data points """ @@ -776,7 +776,7 @@ def _calc_stats(data, samples_per_window, sample_interval, align): def _calc_slope_nstd(data): - return np.diff(data).std() / data.mean() + return np.diff(data).std(ddof=1) / data.mean() def _shift_from_align(align, window): diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 02613fc4fb..9127bde1a3 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -611,9 +611,11 @@ def test_detect_clearsky_irregular_times(detect_clearsky_data): times[0] += 10**9 times = pd.DatetimeIndex(times) with pytest.raises(NotImplementedError): - clear_samples = clearsky.detect_clearsky( - expected['GHI'].values, cs['ghi'].values, times, 10) + clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values, + times, 10) +def test__calc_stats(data, samples_per_window, sample_interval, align): + def test_bird(): """Test Bird/Hulstrom Clearsky Model""" From c5a15b96abfa4b49397f7d44e5d2663b0f14c76d Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 6 Oct 2020 13:10:58 -0600 Subject: [PATCH 09/60] actually add test for _calc_stats with window=3 --- pvlib/tests/test_clearsky.py | 40 +++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 9127bde1a3..c7a8721f92 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -614,8 +614,46 @@ def test_detect_clearsky_irregular_times(detect_clearsky_data): clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values, times, 10) -def test__calc_stats(data, samples_per_window, sample_interval, align): + +def test__calc_stats(): + # assumes window=3 + alignments = ['left', 'center', 'right'] + shift = {k: s for k, s in zip(alignments, [-2, -1, 0])} + x = pd.Series(np.arange(0, 7)**2.) + # all assume right align + # line length between adjacent points + sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) + mean_x = pd.Series(np.array([np.nan, np.nan, 5, 14, 29, 50, 77]) / 3.) + max_x = pd.Series(np.array([np.nan, np.nan, 4, 9, 16, 25, 36])) + diff_std = np.array([np.nan, np.nan, np.std([1, 3], ddof=1), + np.std([3, 5], ddof=1), np.std([5, 7], ddof=1), + np.std([7, 9], ddof=1), np.std([9, 11], ddof=1)]) + slope_nstd = diff_std / mean_x + slope = x.diff().shift(-1) + expected = {} + for align in alignments: + expected[align] = {} + s = shift[align] + expected[align]['mean'] = mean_x.shift(s) + expected[align]['max'] = max_x.shift(s) + # slope between adjacent points + expected[align]['slope'] = slope + line_length = sqt + sqt.shift(-1) + expected[align]['line_length'] = line_length.shift(s + 1) + expected[align]['slope_nstd'] = slope_nstd.shift(s) + expected[align]['data'] = x + for align in expected: + data = expected[align]['data'] + result = clearsky._calc_stats(data=data, window=3, sample_interval=1, + align=align) + res_mean, res_max, res_line_length, res_slope_nstd, res_slope = result + assert_series_equal(res_mean, expected[align]['mean']) + assert_series_equal(res_max, expected[align]['max']) + assert_series_equal(res_line_length, expected[align]['line_length']) + assert_series_equal(res_slope_nstd, expected[align]['slope_nstd']) + assert_series_equal(res_slope, expected[align]['slope']) + def test_bird(): """Test Bird/Hulstrom Clearsky Model""" From c16eed590f6172c06c513951d0604dbefef03f06 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 6 Oct 2020 14:05:36 -0600 Subject: [PATCH 10/60] add test__calc_c5, simplify _calc_c5 --- pvlib/clearsky.py | 6 +++--- pvlib/tests/test_clearsky.py | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 1ed78544ff..3bef666e66 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -823,8 +823,7 @@ def _calc_c5(meas_slope, clear_slope, window, align, limit): shift = _shift_from_align(align, window) center = align == 'center' slope_diff = np.abs(meas_slope - clear_slope) - slope_diff = slope_diff.rolling( - window - 1, center=center).max().shift(shift + 1) + slope_diff = slope_diff.rolling(window, center=center).max() return slope_diff < limit @@ -989,7 +988,8 @@ def detect_clearsky(measured, clearsky, times, window_length, c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window, + # need to reduce window by 1 since slope is differenced already + c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window - 1, align, slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index c7a8721f92..b91295530b 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -625,9 +625,8 @@ def test__calc_stats(): sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) mean_x = pd.Series(np.array([np.nan, np.nan, 5, 14, 29, 50, 77]) / 3.) max_x = pd.Series(np.array([np.nan, np.nan, 4, 9, 16, 25, 36])) - diff_std = np.array([np.nan, np.nan, np.std([1, 3], ddof=1), - np.std([3, 5], ddof=1), np.std([5, 7], ddof=1), - np.std([7, 9], ddof=1), np.std([9, 11], ddof=1)]) + diff_std = np.array([np.nan, np.nan, np.sqrt(2), np.sqrt(2), np.sqrt(2), + np.sqrt(2), np.sqrt(2)]) slope_nstd = diff_std / mean_x slope = x.diff().shift(-1) From a14eb2c9e42d7748571591e5f2b9a003a663b3b7 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 6 Oct 2020 14:07:41 -0600 Subject: [PATCH 11/60] really add test__calc_c5 this time --- pvlib/tests/test_clearsky.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index b91295530b..5e5d087957 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -629,7 +629,7 @@ def test__calc_stats(): np.sqrt(2), np.sqrt(2)]) slope_nstd = diff_std / mean_x slope = x.diff().shift(-1) - + expected = {} for align in alignments: expected[align] = {} @@ -652,7 +652,22 @@ def test__calc_stats(): assert_series_equal(res_line_length, expected[align]['line_length']) assert_series_equal(res_slope_nstd, expected[align]['slope_nstd']) assert_series_equal(res_slope, expected[align]['slope']) - + + +def test_calc_c5(): + alignments = ['left', 'center', 'right'] + ms = pd.Series(np.array([1., 0., 2., 5., -10.])) + cs = pd.Series(np.array([0., 0., 1., 1., 0.])) + limit = 2. + expected = {} + expected['left'] = pd.Series([1., 4., 10., np.nan, np.nan]) < limit + expected['center'] = pd.Series([np.nan, 1., 4., 10., np.nan]) < limit + expected['right'] = pd.Series([np.nan, np.nan, 1., 4., 10.]) < limit + # here test for window=3 + for align in alignments: + result = clearsky._calc_c5(ms, cs, window=3, align=align, limit=limit) + assert_series_equal(result, expected[align]) + def test_bird(): """Test Bird/Hulstrom Clearsky Model""" From 84cb165250ea20301b501870bcd2aabf785d624f Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 8 Oct 2020 11:51:42 -0600 Subject: [PATCH 12/60] fix _calc_c5, use shift --- pvlib/clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 3bef666e66..fe749f8728 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -823,7 +823,7 @@ def _calc_c5(meas_slope, clear_slope, window, align, limit): shift = _shift_from_align(align, window) center = align == 'center' slope_diff = np.abs(meas_slope - clear_slope) - slope_diff = slope_diff.rolling(window, center=center).max() + slope_diff = slope_diff.rolling(window, center=center).max().shift(shift) return slope_diff < limit From e4cf350a64acf31a8ef9845ea1e6ccfeb98a65cb Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 8 Oct 2020 12:32:36 -0600 Subject: [PATCH 13/60] fix test__calc_stats --- pvlib/tests/test_clearsky.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 5e5d087957..cb299a592d 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -644,8 +644,8 @@ def test__calc_stats(): expected[align]['data'] = x for align in expected: data = expected[align]['data'] - result = clearsky._calc_stats(data=data, window=3, sample_interval=1, - align=align) + result = clearsky._calc_stats(data=data, samples_per_window=3, + sample_interval=1, align=align) res_mean, res_max, res_line_length, res_slope_nstd, res_slope = result assert_series_equal(res_mean, expected[align]['mean']) assert_series_equal(res_max, expected[align]['max']) From f1ae2d4415c1186799cd426d7e68933296a2537d Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 8 Oct 2020 12:53:42 -0600 Subject: [PATCH 14/60] remove align from public function kwargs --- pvlib/clearsky.py | 17 ++++++++--------- pvlib/tests/test_clearsky.py | 6 +++--- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index fe749f8728..d4a191d55e 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -851,7 +851,7 @@ def detect_clearsky(measured, clearsky, times, window_length, mean_diff=75, max_diff=75, lower_line_length=-5, upper_line_length=10, var_diff=0.005, slope_dev=8, max_iterations=20, - align='left', return_components=False): + return_components=False): """ Detects clear sky times according to the algorithm developed by Reno and Hansen for GHI measurements. The algorithm [1]_ was designed and @@ -903,9 +903,6 @@ def detect_clearsky(measured, clearsky, times, window_length, max_iterations : int, default 20 Maximum number of times to apply a different scaling factor to the clearsky and redetermine clear_samples. Must be 1 or larger. - align : bool, default 'left' - Alignment of labels to data in sliding window. Must be one of - 'left', 'center', 'right'. return_components : bool, default False Controls if additional output should be returned. See below. @@ -945,6 +942,7 @@ def detect_clearsky(measured, clearsky, times, window_length, * parameters are controllable via keyword arguments * option to return individual test components and clearsky scaling parameter + * uses centered windows (Matlab function uses left-aligned windows) """ # be polite about returning the same type as was input @@ -970,16 +968,16 @@ def detect_clearsky(measured, clearsky, times, window_length, # calculate measurement statistics meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ - = _calc_stats(meas, samples_per_window, sample_interval, align) + = _calc_stats(meas, samples_per_window, sample_interval, 'center') # calculate clear sky statistics clear_mean, clear_max, _, _, clear_slope \ - = _calc_stats(clear, samples_per_window, sample_interval, align) + = _calc_stats(clear, samples_per_window, sample_interval, 'center') alpha = 1 for iteration in range(max_iterations): _, _, clear_line_length, _, _ = _calc_stats( - alpha * clear, samples_per_window, sample_interval, align) + alpha * clear, samples_per_window, sample_interval, 'center') line_diff = meas_line_length - clear_line_length @@ -990,14 +988,15 @@ def detect_clearsky(measured, clearsky, times, window_length, c4 = meas_slope_nstd < var_diff # need to reduce window by 1 since slope is differenced already c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window - 1, - align, slope_dev) + 'center', slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return clear_samples = np.full_like(meas, False, dtype='bool') # find the samples contained in any window classified as clear - idx = _clear_sample_index(clear_windows, samples_per_window, align, H) + idx = _clear_sample_index(clear_windows, samples_per_window, 'center', + H) clear_samples[idx] = True # find a new alpha diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index cb299a592d..6be2145e31 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -617,8 +617,8 @@ def test_detect_clearsky_irregular_times(detect_clearsky_data): def test__calc_stats(): # assumes window=3 - alignments = ['left', 'center', 'right'] - shift = {k: s for k, s in zip(alignments, [-2, -1, 0])} + alignments = ['center'] # 'left' and 'right' could be added in the future + shift = {'center': -1} # 'left': -2, 'right': 0 x = pd.Series(np.arange(0, 7)**2.) # all assume right align # line length between adjacent points @@ -655,7 +655,7 @@ def test__calc_stats(): def test_calc_c5(): - alignments = ['left', 'center', 'right'] + alignments = ['center'] # 'left' and 'right' could be added in the future ms = pd.Series(np.array([1., 0., 2., 5., -10.])) cs = pd.Series(np.array([0., 0., 1., 1., 0.])) limit = 2. From 1e390c9f7e6d6448dfdf968453d1ae00ff79272c Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 8 Oct 2020 15:15:09 -0600 Subject: [PATCH 15/60] fiddly window and shifting --- pvlib/clearsky.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index d4a191d55e..628b440ad9 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -821,9 +821,12 @@ def _calc_line_length(data, sample_interval): def _calc_c5(meas_slope, clear_slope, window, align, limit): shift = _shift_from_align(align, window) + # because input is differenced already (slopes), subtract 1 from window and + # add 1 to shift center = align == 'center' slope_diff = np.abs(meas_slope - clear_slope) - slope_diff = slope_diff.rolling(window, center=center).max().shift(shift) + slope_diff = slope_diff.rolling(window - 1, center=center).max().shift( + shift + 1) return slope_diff < limit @@ -986,8 +989,7 @@ def detect_clearsky(measured, clearsky, times, window_length, c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - # need to reduce window by 1 since slope is differenced already - c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window - 1, + c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window, 'center', slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 From d39218a852373b730435444e4c151ef411b8c3ee Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 12 Oct 2020 15:11:46 -0600 Subject: [PATCH 16/60] fix _calc_c5 --- pvlib/clearsky.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 628b440ad9..70f23d73d8 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -763,8 +763,6 @@ def _calc_stats(data, samples_per_window, sample_interval, align): # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval - # because data_slope_nstd is calculated on differenes the window is - # shortened by 1. :gh:issue:`1070` data_slope_nstd = data.rolling(samples_per_window, center=center).apply( _calc_slope_nstd) data_slope_nstd = data_slope_nstd.shift(shift) @@ -780,13 +778,18 @@ def _calc_slope_nstd(data): def _shift_from_align(align, window): + # number of places to shift to line up pandas.Series.rolling with + # align value. # pandas.Series.rolling uses kwarg center with values of True or False # default is False which means right aligned labels by default - # calculate shift to align='left' (legacy pvlib behavior) - if align == 'left': - shift = 1 - window - else: - shift = 0 + # center=True aligns to center + # Here, calculate shift to align='left' (legacy pvlib behavior) + # code here for future capability if align='left', 'right' are of interest + # commented out to avoid decreasing test coverage +# if align == 'left': +# else: +# shift = 0 + shift = 0 return shift @@ -819,14 +822,14 @@ def _calc_line_length(data, sample_interval): return np.sum(np.sqrt(np.diff(data)**2 + sample_interval**2)) -def _calc_c5(meas_slope, clear_slope, window, align, limit): - shift = _shift_from_align(align, window) - # because input is differenced already (slopes), subtract 1 from window and - # add 1 to shift +def _maxdiff(s): + return np.abs(np.diff(s)).max() + + +def _calc_c5(meas, clear, window, align, limit): center = align == 'center' - slope_diff = np.abs(meas_slope - clear_slope) - slope_diff = slope_diff.rolling(window - 1, center=center).max().shift( - shift + 1) + irrad_diff = meas - clear + slope_diff = irrad_diff.rolling(window, center=center).apply(_maxdiff) return slope_diff < limit @@ -834,16 +837,17 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): """ Returns indices of clear samples in clear windows """ + # first window is in first column of H. clear_windows is aligned by 'align' + # shift clear_windows.index so that first window is in left position, lined + # up with column of H containing the indices of the first window if align == 'right': shift = 1 - samples_per_window elif align == 'center': shift = - (samples_per_window // 2) else: shift = 0 - # first window is in first column of H. clear_windows is aligned by 'align' - # shift clear_windows so that first window is in left position, lined up - # with first column of H idx = clear_windows.shift(shift) + # drop rows at the end corresponding to windows past the end of data idx.drop(clear_windows.tail(samples_per_window - 1).index, inplace=True) idx = idx.astype(bool) # shift added nan, changed type to object clear_samples = np.unique(H[:, idx]) @@ -989,7 +993,8 @@ def detect_clearsky(measured, clearsky, times, window_length, c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - c5 = _calc_c5(meas_slope, alpha * clear_slope, samples_per_window, + # need to reduce window by 1 since slope is differenced already + c5 = _calc_c5(meas, alpha * clear, samples_per_window, 'center', slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 From 86c4e79ce9c6ce442ce4af9ddec8bdffacb827f6 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 12 Oct 2020 15:12:18 -0600 Subject: [PATCH 17/60] can now label points at the end of the time series --- pvlib/data/detect_clearsky_data.csv | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pvlib/data/detect_clearsky_data.csv b/pvlib/data/detect_clearsky_data.csv index 7b3a3b58ad..4d1c91e11f 100644 --- a/pvlib/data/detect_clearsky_data.csv +++ b/pvlib/data/detect_clearsky_data.csv @@ -1,7 +1,7 @@ -# latitude:35.04 -# longitude:-106.62 -# elevation:1619 -# window_length:10 +# latitude:35.04,, +# longitude:-106.62,, +# elevation:1619,, +# window_length:10,, Time (UTC),GHI,Clear or not 4/1/2012 17:30,862.0935268,1 4/1/2012 17:31,863.9298884,1 @@ -30,6 +30,6 @@ Time (UTC),GHI,Clear or not 4/1/2012 17:54,901.3604574,1 4/1/2012 17:55,902.7755677,1 4/1/2012 17:56,950,0 -4/1/2012 17:57,905.5519049,0 -4/1/2012 17:58,906.9130747,0 -4/1/2012 17:59,908.256208,0 +4/1/2012 17:57,905.5519049,1 +4/1/2012 17:58,906.9130747,1 +4/1/2012 17:59,908.256208,1 From ace81cff42944e6a9a1be866a38ca902794f29ca Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 12 Oct 2020 15:24:06 -0600 Subject: [PATCH 18/60] fix formatting --- pvlib/clearsky.py | 7 ++++--- pvlib/data/detect_clearsky_data.csv | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 70f23d73d8..38916ef70d 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -786,9 +786,10 @@ def _shift_from_align(align, window): # Here, calculate shift to align='left' (legacy pvlib behavior) # code here for future capability if align='left', 'right' are of interest # commented out to avoid decreasing test coverage -# if align == 'left': -# else: -# shift = 0 + # if align == 'left': + # shift = 1 - window + # else: + # shift = 0 shift = 0 return shift diff --git a/pvlib/data/detect_clearsky_data.csv b/pvlib/data/detect_clearsky_data.csv index 4d1c91e11f..edb3b33391 100644 --- a/pvlib/data/detect_clearsky_data.csv +++ b/pvlib/data/detect_clearsky_data.csv @@ -1,7 +1,7 @@ -# latitude:35.04,, -# longitude:-106.62,, -# elevation:1619,, -# window_length:10,, +# latitude:35.04 +# longitude:-106.62 +# elevation:1619 +# window_length:10 Time (UTC),GHI,Clear or not 4/1/2012 17:30,862.0935268,1 4/1/2012 17:31,863.9298884,1 From ffc7fac8cc7f788e62b6deac8a82b2d3b9cfea66 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 12 Oct 2020 15:49:19 -0600 Subject: [PATCH 19/60] only labels end of data if window is short enough --- pvlib/data/detect_clearsky_data.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pvlib/data/detect_clearsky_data.csv b/pvlib/data/detect_clearsky_data.csv index edb3b33391..7b3a3b58ad 100644 --- a/pvlib/data/detect_clearsky_data.csv +++ b/pvlib/data/detect_clearsky_data.csv @@ -30,6 +30,6 @@ Time (UTC),GHI,Clear or not 4/1/2012 17:54,901.3604574,1 4/1/2012 17:55,902.7755677,1 4/1/2012 17:56,950,0 -4/1/2012 17:57,905.5519049,1 -4/1/2012 17:58,906.9130747,1 -4/1/2012 17:59,908.256208,1 +4/1/2012 17:57,905.5519049,0 +4/1/2012 17:58,906.9130747,0 +4/1/2012 17:59,908.256208,0 From 25c9c8b5e67dad0502c1952aa9483bae8e64e162 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 12 Oct 2020 16:15:53 -0600 Subject: [PATCH 20/60] improve coverage --- pvlib/clearsky.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 38916ef70d..d2f33321dd 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -841,12 +841,14 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): # first window is in first column of H. clear_windows is aligned by 'align' # shift clear_windows.index so that first window is in left position, lined # up with column of H containing the indices of the first window - if align == 'right': - shift = 1 - samples_per_window - elif align == 'center': - shift = - (samples_per_window // 2) - else: - shift = 0 + # commented if/else block for future align='left', 'right' capability + # if align == 'right': + # shift = 1 - samples_per_window + # elif align == 'center': + # shift = - (samples_per_window // 2) + # else: + # shift = 0 + shift = -(samples_per_window // 2) idx = clear_windows.shift(shift) # drop rows at the end corresponding to windows past the end of data idx.drop(clear_windows.tail(samples_per_window - 1).index, inplace=True) From ce4abbb2953bd24aabcfe857616047087404aa01 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Fri, 16 Oct 2020 13:52:30 -0600 Subject: [PATCH 21/60] add to whatsnew --- docs/sphinx/source/whatsnew/v0.8.1.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sphinx/source/whatsnew/v0.8.1.rst b/docs/sphinx/source/whatsnew/v0.8.1.rst index 2f39720627..57b9d03f8d 100644 --- a/docs/sphinx/source/whatsnew/v0.8.1.rst +++ b/docs/sphinx/source/whatsnew/v0.8.1.rst @@ -17,6 +17,8 @@ Enhancements option to :py:class:`~pvlib.modelchain.ModelChain`. (:pull:`1042`) (:issue:`1073`) * Added :py:func:`pvlib.temperature.ross` for cell temperature modeling using only NOCT. (:pull:`1045`) +* :py:func:`pvlib.clearsky.detect_clearsky` now uses centered rolling windows + instead of left-aligned rolling windows. (:pull:`1074`) Bug fixes From a071471c48c9fb563b5311267ec030ae00ba0fab Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 28 Oct 2020 12:25:01 -0600 Subject: [PATCH 22/60] edits from partial review --- pvlib/clearsky.py | 28 ++++++++++++---------------- pvlib/tests/test_clearsky.py | 2 +- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index d2f33321dd..1196db37a2 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -736,7 +736,7 @@ def _calc_stats(data, samples_per_window, sample_interval, align): Number of data points in each window sample_interval : float Time in minutes in each sample interval - align : bool, default 'left' + align : str Alignment of labels to data in sliding window. Must be one of 'left', 'center', 'right'. @@ -756,18 +756,15 @@ def _calc_stats(data, samples_per_window, sample_interval, align): shift = _shift_from_align(align, samples_per_window) center = align == 'center' - data_mean = data.rolling(samples_per_window, - center=center).mean().shift(shift) - data_max = data.rolling(samples_per_window, - center=center).max().shift(shift) + roller = data.rolling(samples_per_window, center=center) + data_mean = roller.mean().shift(shift) + data_max = roller.max().shift(shift) # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval - data_slope_nstd = data.rolling(samples_per_window, center=center).apply( - _calc_slope_nstd) + data_slope_nstd = roller.apply(_calc_slope_nstd) data_slope_nstd = data_slope_nstd.shift(shift) - data_line_length = data.rolling(samples_per_window, center=center).apply( - _calc_line_length, args=(sample_interval,)) + data_line_length = roller.apply(_calc_line_length, args=(sample_interval,)) data_line_length = data_line_length.shift(shift) return (data_mean, data_max, data_line_length, data_slope_nstd, data_slope) @@ -799,18 +796,17 @@ def _count_in_window(ts, window): return int(ts.resample(window).agg('sum')[0]) -def _get_sample_intervals(times, window_length): - """ Calculates timeinterval and samples per window for Reno-style clear +def _get_sample_intervals(times, win_length): + """ Calculates time interval and samples per window for Reno-style clear sky detection functions """ - window = pd.Timedelta(minutes=window_length) - temp = pd.Series(data=1, index=times) + deltas = np.diff(times.values) / np.timedelta64(1, '60s') # determine if we can proceed - if times.inferred_freq and len(temp.unique()) == 1: + if times.inferred_freq and len(np.unique(deltas)) == 1: sample_interval = times[1] - times[0] sample_interval = sample_interval.seconds / 60 # interval in minutes - samples_per_window = _count_in_window(temp, window) + samples_per_window = int(win_length / sample_interval) return sample_interval, samples_per_window else: raise NotImplementedError('algorithm does not yet support unequal ' @@ -1035,7 +1031,7 @@ def rmse(alpha): components['slope_nstd'] = c4 components['slope_max'] = c5 components['mean_nan'] = c6 -# components['windows'] = clear_windows + components['windows'] = clear_windows return clear_samples, components, alpha else: return clear_samples diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 6be2145e31..2f3a08fa86 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -654,7 +654,7 @@ def test__calc_stats(): assert_series_equal(res_slope, expected[align]['slope']) -def test_calc_c5(): +def test__calc_c5(): alignments = ['center'] # 'left' and 'right' could be added in the future ms = pd.Series(np.array([1., 0., 2., 5., -10.])) cs = pd.Series(np.array([0., 0., 1., 1., 0.])) From 4e4eeb5c19dc345c82a7d40c256d0070b8a28120 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 29 Oct 2020 07:46:42 -0600 Subject: [PATCH 23/60] remove unused function --- pvlib/clearsky.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 1196db37a2..9780822278 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -791,11 +791,6 @@ def _shift_from_align(align, window): return shift -def _count_in_window(ts, window): - # return int type instead of default int64, to avoid error in pandas - return int(ts.resample(window).agg('sum')[0]) - - def _get_sample_intervals(times, win_length): """ Calculates time interval and samples per window for Reno-style clear sky detection functions From 6e6af32f071bd91ebe91f7e7b51c62d5001d79ff Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Fri, 4 Dec 2020 09:02:03 -0700 Subject: [PATCH 24/60] put test for equal time steps back --- pvlib/clearsky.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 9780822278..eb9f3ec7b5 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -945,6 +945,14 @@ def detect_clearsky(measured, clearsky, times, window_length, parameter * uses centered windows (Matlab function uses left-aligned windows) """ + # determine the unique deltas and if we can proceed + deltas = np.diff(times.values) / np.timedelta64(1, '60s') + unique_deltas = np.unique(deltas) + if len(unique_deltas) == 1: + sample_interval = unique_deltas[0] + else: + raise NotImplementedError('algorithm does not yet support unequal ' + 'times. consider resampling your data.') # be polite about returning the same type as was input ispandas = isinstance(measured, pd.Series) From a592d2595adb0f9287f0655f0332cd7ae2eacfca Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Fri, 4 Dec 2020 09:16:36 -0700 Subject: [PATCH 25/60] make times optional --- pvlib/clearsky.py | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index eb9f3ec7b5..2eab4d906e 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -848,7 +848,7 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): return clear_samples -def detect_clearsky(measured, clearsky, times, window_length, +def detect_clearsky(measured, clearsky, times=None, window_length=10, mean_diff=75, max_diff=75, lower_line_length=-5, upper_line_length=10, var_diff=0.005, slope_dev=8, max_iterations=20, @@ -874,24 +874,25 @@ def detect_clearsky(measured, clearsky, times, window_length, Parameters ---------- measured : array or Series - Time series of measured values. + Time series of measured GHI. [W/m2] clearsky : array or Series - Time series of the expected clearsky values. - times : DatetimeIndex - Times of measured and clearsky values. - window_length : int + Time series of the expected clearsky GHI. [W/m2] + times : DatetimeIndex or None, default None. + Times of measured and clearsky values. If None the index of measured + will be used. + window_length : int, default 10 Length of sliding time window in minutes. Must be greater than 2 periods. mean_diff : float, default 75 Threshold value for agreement between mean values of measured - and clearsky in each interval, see Eq. 6 in [1]. + and clearsky in each interval, see Eq. 6 in [1]. [W/m2] max_diff : float, default 75 Threshold value for agreement between maxima of measured and - clearsky values in each interval, see Eq. 7 in [1]. + clearsky values in each interval, see Eq. 7 in [1]. [W/m2] lower_line_length : float, default -5 Lower limit of line length criterion from Eq. 8 in [1]. - Criterion satisfied when - lower_line_length < line length difference < upper_line_length + Criterion satisfied when lower_line_length < line length difference + < upper_line_length. upper_line_length : float, default 10 Upper limit of line length criterion from Eq. 8 in [1]. var_diff : float, default 0.005 @@ -922,6 +923,13 @@ def detect_clearsky(measured, clearsky, times, window_length, detected clear_samples. Only provided if return_components is True. + Raises + ------ + ValueError + If measured is not a Series and times is not provided + NotImplementedError + If timestamps are not equally spaced + References ---------- .. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear @@ -945,6 +953,13 @@ def detect_clearsky(measured, clearsky, times, window_length, parameter * uses centered windows (Matlab function uses left-aligned windows) """ + + if times is None: + try: + times = measured.index + except AttributeError: + raise ValueError("times is required when measured is not a Series") + # determine the unique deltas and if we can proceed deltas = np.diff(times.values) / np.timedelta64(1, '60s') unique_deltas = np.unique(deltas) From f2c5e8e50129696d7e835e5fd019c018def37981 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Fri, 4 Dec 2020 09:17:25 -0700 Subject: [PATCH 26/60] make times optional --- pvlib/tests/test_clearsky.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 2f3a08fa86..30ffbe9ddb 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -549,7 +549,15 @@ def detect_clearsky_data(): def test_detect_clearsky(detect_clearsky_data): expected, cs = detect_clearsky_data clear_samples = clearsky.detect_clearsky( - expected['GHI'], cs['ghi'], cs.index, 10) + expected['GHI'], cs['ghi'], times=cs.index, window_length=10) + assert_series_equal(expected['Clear or not'], clear_samples, + check_dtype=False, check_names=False) + + +def test_detect_clearsky_defaults(detect_clearsky_data): + expected, cs = detect_clearsky_data + clear_samples = clearsky.detect_clearsky( + expected['GHI'], cs['ghi']) assert_series_equal(expected['Clear or not'], clear_samples, check_dtype=False, check_names=False) @@ -557,7 +565,8 @@ def test_detect_clearsky(detect_clearsky_data): def test_detect_clearsky_components(detect_clearsky_data): expected, cs = detect_clearsky_data clear_samples, components, alpha = clearsky.detect_clearsky( - expected['GHI'], cs['ghi'], cs.index, 10, return_components=True) + expected['GHI'], cs['ghi'], times=cs.index, window_length=10, + return_components=True) assert_series_equal(expected['Clear or not'], clear_samples, check_dtype=False, check_names=False) assert isinstance(components, OrderedDict) @@ -569,11 +578,11 @@ def test_detect_clearsky_iterations(detect_clearsky_data): alpha = 1.0448 with pytest.warns(RuntimeWarning): clear_samples = clearsky.detect_clearsky( - expected['GHI'], cs['ghi']*alpha, cs.index, 10, max_iterations=1) + expected['GHI'], cs['ghi']*alpha, max_iterations=1) assert (clear_samples[:'2012-04-01 10:41:00'] == True).all() assert (clear_samples['2012-04-01 10:42:00':] == False).all() clear_samples = clearsky.detect_clearsky( - expected['GHI'], cs['ghi']*alpha, cs.index, 10, max_iterations=20) + expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples, check_dtype=False, check_names=False) @@ -581,7 +590,7 @@ def test_detect_clearsky_iterations(detect_clearsky_data): def test_detect_clearsky_kwargs(detect_clearsky_data): expected, cs = detect_clearsky_data clear_samples = clearsky.detect_clearsky( - expected['GHI'], cs['ghi'], cs.index, 10, + expected['GHI'], cs['ghi'], times=cs.index, window_length=10, mean_diff=1000, max_diff=1000, lower_line_length=-1000, upper_line_length=1000, var_diff=10, slope_dev=1000) assert clear_samples.all() @@ -590,7 +599,7 @@ def test_detect_clearsky_kwargs(detect_clearsky_data): def test_detect_clearsky_window(detect_clearsky_data): expected, cs = detect_clearsky_data clear_samples = clearsky.detect_clearsky( - expected['GHI'], cs['ghi'], cs.index, 3) + expected['GHI'], cs['ghi'], window_length=3) expected = expected['Clear or not'].copy() expected.iloc[-3:] = True assert_series_equal(expected, clear_samples, @@ -600,7 +609,8 @@ def test_detect_clearsky_window(detect_clearsky_data): def test_detect_clearsky_arrays(detect_clearsky_data): expected, cs = detect_clearsky_data clear_samples = clearsky.detect_clearsky( - expected['GHI'].values, cs['ghi'].values, cs.index, 10) + expected['GHI'].values, cs['ghi'].values, times=cs.index, + window_length=10) assert isinstance(clear_samples, np.ndarray) assert (clear_samples == expected['Clear or not'].values).all() @@ -615,6 +625,12 @@ def test_detect_clearsky_irregular_times(detect_clearsky_data): times, 10) +def test_detect_clearsky_missing_index(detect_clearsky_data): + expected, cs = detect_clearsky_data + with pytest.raises(ValueError): + clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) + + def test__calc_stats(): # assumes window=3 alignments = ['center'] # 'left' and 'right' could be added in the future From 352212bb76870d06a804f80574304f36437149a4 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Fri, 4 Dec 2020 11:06:34 -0700 Subject: [PATCH 27/60] undo redundant raise, use total_seconds() --- pvlib/clearsky.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 2eab4d906e..c89d258051 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -800,7 +800,7 @@ def _get_sample_intervals(times, win_length): # determine if we can proceed if times.inferred_freq and len(np.unique(deltas)) == 1: sample_interval = times[1] - times[0] - sample_interval = sample_interval.seconds / 60 # interval in minutes + sample_interval = sample_interval.total_seconds() / 60 # interval in minutes samples_per_window = int(win_length / sample_interval) return sample_interval, samples_per_window else: @@ -960,15 +960,6 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, except AttributeError: raise ValueError("times is required when measured is not a Series") - # determine the unique deltas and if we can proceed - deltas = np.diff(times.values) / np.timedelta64(1, '60s') - unique_deltas = np.unique(deltas) - if len(unique_deltas) == 1: - sample_interval = unique_deltas[0] - else: - raise NotImplementedError('algorithm does not yet support unequal ' - 'times. consider resampling your data.') - # be polite about returning the same type as was input ispandas = isinstance(measured, pd.Series) From 74e1e541d99e794028a161c95854d941e7b59635 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 7 Dec 2020 08:55:38 -0700 Subject: [PATCH 28/60] formatting --- pvlib/clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index c89d258051..eec3afdcd4 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -800,7 +800,7 @@ def _get_sample_intervals(times, win_length): # determine if we can proceed if times.inferred_freq and len(np.unique(deltas)) == 1: sample_interval = times[1] - times[0] - sample_interval = sample_interval.total_seconds() / 60 # interval in minutes + sample_interval = sample_interval.total_seconds() / 60 # in minutes samples_per_window = int(win_length / sample_interval) return sample_interval, samples_per_window else: From 055a7c3007df8a9f6f193602661c08bc1cfe8c0a Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 7 Dec 2020 15:35:50 -0700 Subject: [PATCH 29/60] go back to seconds --- pvlib/clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index eec3afdcd4..39dfb74f4a 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -800,7 +800,7 @@ def _get_sample_intervals(times, win_length): # determine if we can proceed if times.inferred_freq and len(np.unique(deltas)) == 1: sample_interval = times[1] - times[0] - sample_interval = sample_interval.total_seconds() / 60 # in minutes + sample_interval = sample_interval.seconds / 60 # in minutes samples_per_window = int(win_length / sample_interval) return sample_interval, samples_per_window else: From f7dadbb90e9f5dd3d77722024d0defeceb498123 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 15 Dec 2020 10:13:46 -0700 Subject: [PATCH 30/60] remove array inputs --- pvlib/clearsky.py | 61 ++++++++++-------------------------- pvlib/tests/test_clearsky.py | 13 ++------ 2 files changed, 19 insertions(+), 55 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 39dfb74f4a..0c6cbaec76 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -848,7 +848,7 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): return clear_samples -def detect_clearsky(measured, clearsky, times=None, window_length=10, +def detect_clearsky(measured, clearsky, window_length=10, mean_diff=75, max_diff=75, lower_line_length=-5, upper_line_length=10, var_diff=0.005, slope_dev=8, max_iterations=20, @@ -873,13 +873,10 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, Parameters ---------- - measured : array or Series + measured : Series Time series of measured GHI. [W/m2] - clearsky : array or Series + clearsky : Series Time series of the expected clearsky GHI. [W/m2] - times : DatetimeIndex or None, default None. - Times of measured and clearsky values. If None the index of measured - will be used. window_length : int, default 10 Length of sliding time window in minutes. Must be greater than 2 periods. @@ -910,9 +907,9 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, Returns ------- - clear_samples : array or Series - Boolean array or Series of whether or not the given time is - clear. Return type is the same as the input type. + clear_samples : Series + Series of boolean indicating whether or not the given time is + clear. components : OrderedDict, optional Dict of arrays of whether or not the given time window is clear @@ -925,8 +922,6 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, Raises ------ - ValueError - If measured is not a Series and times is not provided NotImplementedError If timestamps are not equally spaced @@ -954,45 +949,25 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, * uses centered windows (Matlab function uses left-aligned windows) """ - if times is None: - try: - times = measured.index - except AttributeError: - raise ValueError("times is required when measured is not a Series") - - # be polite about returning the same type as was input - ispandas = isinstance(measured, pd.Series) - - # for internal use, need a Series - if not ispandas: - meas = pd.Series(measured, index=times) - else: - meas = measured - - if not isinstance(clearsky, pd.Series): - clear = pd.Series(clearsky, index=times) - else: - clear = clearsky - - sample_interval, samples_per_window = _get_sample_intervals(times, + sample_interval, samples_per_window = _get_sample_intervals(measured.index, window_length) # generate matrix of integers for creating windows with indexing H = hankel(np.arange(samples_per_window), - np.arange(samples_per_window-1, len(times))) + np.arange(samples_per_window-1, len(measured.index))) # calculate measurement statistics meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ - = _calc_stats(meas, samples_per_window, sample_interval, 'center') + = _calc_stats(measured, samples_per_window, sample_interval, 'center') # calculate clear sky statistics clear_mean, clear_max, _, _, clear_slope \ - = _calc_stats(clear, samples_per_window, sample_interval, 'center') + = _calc_stats(clearsky, samples_per_window, sample_interval, 'center') alpha = 1 for iteration in range(max_iterations): _, _, clear_line_length, _, _ = _calc_stats( - alpha * clear, samples_per_window, sample_interval, 'center') + alpha * clearsky, samples_per_window, sample_interval, 'center') line_diff = meas_line_length - clear_line_length @@ -1002,13 +977,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff # need to reduce window by 1 since slope is differenced already - c5 = _calc_c5(meas, alpha * clear, samples_per_window, + c5 = _calc_c5(measured, alpha * clearsky, samples_per_window, 'center', slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return - clear_samples = np.full_like(meas, False, dtype='bool') + clear_samples = np.full_like(measured, False, dtype='bool') # find the samples contained in any window classified as clear idx = _clear_sample_index(clear_windows, samples_per_window, 'center', H) @@ -1016,10 +991,12 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # find a new alpha previous_alpha = alpha - clear_meas = meas[clear_samples] - clear_clear = clear[clear_samples] + clear_meas = measured[clear_samples] + clear_clear = clearsky[clear_samples] + def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) + alpha = minimize_scalar(rmse).x if round(alpha*10000) == round(previous_alpha*10000): break @@ -1028,10 +1005,6 @@ def rmse(alpha): warnings.warn('failed to converge after %s iterations' % max_iterations, RuntimeWarning) - # be polite about returning the same type as was input - if ispandas: - clear_samples = pd.Series(clear_samples, index=times) - if return_components: components = OrderedDict() components['mean_diff'] = c1 diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 30ffbe9ddb..1fe7afaaf1 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -579,8 +579,8 @@ def test_detect_clearsky_iterations(detect_clearsky_data): with pytest.warns(RuntimeWarning): clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=1) - assert (clear_samples[:'2012-04-01 10:41:00'] == True).all() - assert (clear_samples['2012-04-01 10:42:00':] == False).all() + assert (clear_samples[:'2012-04-01 10:41:00'] is True).all() + assert (clear_samples['2012-04-01 10:42:00':] is False).all() clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples, @@ -606,15 +606,6 @@ def test_detect_clearsky_window(detect_clearsky_data): check_dtype=False, check_names=False) -def test_detect_clearsky_arrays(detect_clearsky_data): - expected, cs = detect_clearsky_data - clear_samples = clearsky.detect_clearsky( - expected['GHI'].values, cs['ghi'].values, times=cs.index, - window_length=10) - assert isinstance(clear_samples, np.ndarray) - assert (clear_samples == expected['Clear or not'].values).all() - - def test_detect_clearsky_irregular_times(detect_clearsky_data): expected, cs = detect_clearsky_data times = cs.index.values.copy() From 282f37476e9f3f86e019573bfd38369bb70264db Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 16 Dec 2020 08:50:07 -0700 Subject: [PATCH 31/60] Revert "remove array inputs" This reverts commit f7dadbb90e9f5dd3d77722024d0defeceb498123. --- pvlib/clearsky.py | 61 ++++++++++++++++++++++++++---------- pvlib/tests/test_clearsky.py | 13 ++++++-- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 0c6cbaec76..39dfb74f4a 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -848,7 +848,7 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): return clear_samples -def detect_clearsky(measured, clearsky, window_length=10, +def detect_clearsky(measured, clearsky, times=None, window_length=10, mean_diff=75, max_diff=75, lower_line_length=-5, upper_line_length=10, var_diff=0.005, slope_dev=8, max_iterations=20, @@ -873,10 +873,13 @@ def detect_clearsky(measured, clearsky, window_length=10, Parameters ---------- - measured : Series + measured : array or Series Time series of measured GHI. [W/m2] - clearsky : Series + clearsky : array or Series Time series of the expected clearsky GHI. [W/m2] + times : DatetimeIndex or None, default None. + Times of measured and clearsky values. If None the index of measured + will be used. window_length : int, default 10 Length of sliding time window in minutes. Must be greater than 2 periods. @@ -907,9 +910,9 @@ def detect_clearsky(measured, clearsky, window_length=10, Returns ------- - clear_samples : Series - Series of boolean indicating whether or not the given time is - clear. + clear_samples : array or Series + Boolean array or Series of whether or not the given time is + clear. Return type is the same as the input type. components : OrderedDict, optional Dict of arrays of whether or not the given time window is clear @@ -922,6 +925,8 @@ def detect_clearsky(measured, clearsky, window_length=10, Raises ------ + ValueError + If measured is not a Series and times is not provided NotImplementedError If timestamps are not equally spaced @@ -949,25 +954,45 @@ def detect_clearsky(measured, clearsky, window_length=10, * uses centered windows (Matlab function uses left-aligned windows) """ - sample_interval, samples_per_window = _get_sample_intervals(measured.index, + if times is None: + try: + times = measured.index + except AttributeError: + raise ValueError("times is required when measured is not a Series") + + # be polite about returning the same type as was input + ispandas = isinstance(measured, pd.Series) + + # for internal use, need a Series + if not ispandas: + meas = pd.Series(measured, index=times) + else: + meas = measured + + if not isinstance(clearsky, pd.Series): + clear = pd.Series(clearsky, index=times) + else: + clear = clearsky + + sample_interval, samples_per_window = _get_sample_intervals(times, window_length) # generate matrix of integers for creating windows with indexing H = hankel(np.arange(samples_per_window), - np.arange(samples_per_window-1, len(measured.index))) + np.arange(samples_per_window-1, len(times))) # calculate measurement statistics meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ - = _calc_stats(measured, samples_per_window, sample_interval, 'center') + = _calc_stats(meas, samples_per_window, sample_interval, 'center') # calculate clear sky statistics clear_mean, clear_max, _, _, clear_slope \ - = _calc_stats(clearsky, samples_per_window, sample_interval, 'center') + = _calc_stats(clear, samples_per_window, sample_interval, 'center') alpha = 1 for iteration in range(max_iterations): _, _, clear_line_length, _, _ = _calc_stats( - alpha * clearsky, samples_per_window, sample_interval, 'center') + alpha * clear, samples_per_window, sample_interval, 'center') line_diff = meas_line_length - clear_line_length @@ -977,13 +1002,13 @@ def detect_clearsky(measured, clearsky, window_length=10, c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff # need to reduce window by 1 since slope is differenced already - c5 = _calc_c5(measured, alpha * clearsky, samples_per_window, + c5 = _calc_c5(meas, alpha * clear, samples_per_window, 'center', slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return - clear_samples = np.full_like(measured, False, dtype='bool') + clear_samples = np.full_like(meas, False, dtype='bool') # find the samples contained in any window classified as clear idx = _clear_sample_index(clear_windows, samples_per_window, 'center', H) @@ -991,12 +1016,10 @@ def detect_clearsky(measured, clearsky, window_length=10, # find a new alpha previous_alpha = alpha - clear_meas = measured[clear_samples] - clear_clear = clearsky[clear_samples] - + clear_meas = meas[clear_samples] + clear_clear = clear[clear_samples] def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) - alpha = minimize_scalar(rmse).x if round(alpha*10000) == round(previous_alpha*10000): break @@ -1005,6 +1028,10 @@ def rmse(alpha): warnings.warn('failed to converge after %s iterations' % max_iterations, RuntimeWarning) + # be polite about returning the same type as was input + if ispandas: + clear_samples = pd.Series(clear_samples, index=times) + if return_components: components = OrderedDict() components['mean_diff'] = c1 diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 1fe7afaaf1..30ffbe9ddb 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -579,8 +579,8 @@ def test_detect_clearsky_iterations(detect_clearsky_data): with pytest.warns(RuntimeWarning): clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=1) - assert (clear_samples[:'2012-04-01 10:41:00'] is True).all() - assert (clear_samples['2012-04-01 10:42:00':] is False).all() + assert (clear_samples[:'2012-04-01 10:41:00'] == True).all() + assert (clear_samples['2012-04-01 10:42:00':] == False).all() clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples, @@ -606,6 +606,15 @@ def test_detect_clearsky_window(detect_clearsky_data): check_dtype=False, check_names=False) +def test_detect_clearsky_arrays(detect_clearsky_data): + expected, cs = detect_clearsky_data + clear_samples = clearsky.detect_clearsky( + expected['GHI'].values, cs['ghi'].values, times=cs.index, + window_length=10) + assert isinstance(clear_samples, np.ndarray) + assert (clear_samples == expected['Clear or not'].values).all() + + def test_detect_clearsky_irregular_times(detect_clearsky_data): expected, cs = detect_clearsky_data times = cs.index.values.copy() From 56950de39e4d79dd922ffc29f6033117f4704bc6 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 22 Dec 2020 12:17:49 -0700 Subject: [PATCH 32/60] changes from review: drop tail, edit whatsnew, add comments to detect_clearsky --- docs/sphinx/source/whatsnew/v0.8.1.rst | 6 ++---- pvlib/clearsky.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.8.1.rst b/docs/sphinx/source/whatsnew/v0.8.1.rst index 2bf2241c9c..15d69d9b69 100644 --- a/docs/sphinx/source/whatsnew/v0.8.1.rst +++ b/docs/sphinx/source/whatsnew/v0.8.1.rst @@ -5,7 +5,8 @@ v0.8.1 (MONTH DAY YEAR) Breaking changes ~~~~~~~~~~~~~~~~ - +* :py:func:`pvlib.clearsky.detect_clearsky` no longer returns the Boolean areas + Deprecations ~~~~~~~~~~~~ @@ -19,9 +20,6 @@ Enhancements only NOCT. (:pull:`1045`) * :py:func:`pvlib.clearsky.detect_clearsky` now uses centered rolling windows instead of left-aligned rolling windows. (:pull:`1074`) - -* Added :py:func:`pvlib.inverter.sandia_multi` for modeling inverters with - multiple MPPTs (:issue:`457`, :pull:`1085`) * Added :py:func:`pvlib.inverter.sandia_multi` and :py:func:`pvlib.inverter.pvwatts_multi` for modeling inverters with multiple MPPTs (:issue:`457`, :pull:`1085`, :pull:`1106`) * Added optional ``attributes`` parameter to :py:func:`pvlib.iotools.get_psm3` diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 39dfb74f4a..b34444a7df 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -775,6 +775,8 @@ def _calc_slope_nstd(data): def _shift_from_align(align, window): + # development code left here as comments in case we decide to support + # align='left', 'right' in the future # number of places to shift to line up pandas.Series.rolling with # align value. # pandas.Series.rolling uses kwarg center with values of True or False @@ -842,7 +844,7 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): shift = -(samples_per_window // 2) idx = clear_windows.shift(shift) # drop rows at the end corresponding to windows past the end of data - idx.drop(clear_windows.tail(samples_per_window - 1).index, inplace=True) + idx.drop(clear_windows.index[1 - samples_per_window:], inplace=True) idx = idx.astype(bool) # shift added nan, changed type to object clear_samples = np.unique(H[:, idx]) return clear_samples @@ -989,6 +991,12 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, clear_mean, clear_max, _, _, clear_slope \ = _calc_stats(clear, samples_per_window, sample_interval, 'center') + # find a scaling factor for the clear sky time series that minimizes the + # RMSE between the clear times identified in the measured data and the + # scaled clear sky time series. Optimization to determine the scaling + # factor considers all identified clear times, which is different from [1] + # where the scaling factor was determined from clear times on days with + # at least 50% of the day being identified as clear. alpha = 1 for iteration in range(max_iterations): _, _, clear_line_length, _, _ = _calc_stats( @@ -1001,7 +1009,6 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - # need to reduce window by 1 since slope is differenced already c5 = _calc_c5(meas, alpha * clear, samples_per_window, 'center', slope_dev) c6 = (clear_mean != 0) & ~np.isnan(clear_mean) @@ -1018,14 +1025,16 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, previous_alpha = alpha clear_meas = meas[clear_samples] clear_clear = clear[clear_samples] + def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) + alpha = minimize_scalar(rmse).x if round(alpha*10000) == round(previous_alpha*10000): break else: import warnings - warnings.warn('failed to converge after %s iterations' + warnings.warn('rescaling failed to converge after %s iterations' % max_iterations, RuntimeWarning) # be polite about returning the same type as was input From 5425ad3b80e74d9410cfe7c4c28db748a1dcc116 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 22 Dec 2020 14:38:56 -0700 Subject: [PATCH 33/60] revert change to detect_clearsky output --- docs/sphinx/source/whatsnew/v0.8.1.rst | 3 +- pvlib/clearsky.py | 289 +++++++++++++------------ pvlib/tests/test_clearsky.py | 4 +- 3 files changed, 152 insertions(+), 144 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.8.1.rst b/docs/sphinx/source/whatsnew/v0.8.1.rst index 15d69d9b69..c559d2de3f 100644 --- a/docs/sphinx/source/whatsnew/v0.8.1.rst +++ b/docs/sphinx/source/whatsnew/v0.8.1.rst @@ -5,8 +5,6 @@ v0.8.1 (MONTH DAY YEAR) Breaking changes ~~~~~~~~~~~~~~~~ -* :py:func:`pvlib.clearsky.detect_clearsky` no longer returns the Boolean areas - Deprecations ~~~~~~~~~~~~ @@ -20,6 +18,7 @@ Enhancements only NOCT. (:pull:`1045`) * :py:func:`pvlib.clearsky.detect_clearsky` now uses centered rolling windows instead of left-aligned rolling windows. (:pull:`1074`) +* The 'times' parameter is now optional in :py:func:`pvlib.clearsky.detect_clearsky`. (:pull:`1074`) * Added :py:func:`pvlib.inverter.sandia_multi` and :py:func:`pvlib.inverter.pvwatts_multi` for modeling inverters with multiple MPPTs (:issue:`457`, :pull:`1085`, :pull:`1106`) * Added optional ``attributes`` parameter to :py:func:`pvlib.iotools.get_psm3` diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index b34444a7df..4336994377 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -599,132 +599,6 @@ def _calc_d(aod700, p): return d -def bird(zenith, airmass_relative, aod380, aod500, precipitable_water, - ozone=0.3, pressure=101325., dni_extra=1364., asymmetry=0.85, - albedo=0.2): - """ - Bird Simple Clear Sky Broadband Solar Radiation Model - - Based on NREL Excel implementation by Daryl R. Myers [1, 2]. - - Bird and Hulstrom define the zenith as the "angle between a line to - the sun and the local zenith". There is no distinction in the paper - between solar zenith and apparent (or refracted) zenith, but the - relative airmass is defined using the Kasten 1966 expression, which - requires apparent zenith. Although the formulation for calculated - zenith is never explicitly defined in the report, since the purpose - was to compare existing clear sky models with "rigorous radiative - transfer models" (RTM) it is possible that apparent zenith was - obtained as output from the RTM. However, the implentation presented - in PVLIB is tested against the NREL Excel implementation by Daryl - Myers which uses an analytical expression for solar zenith instead - of apparent zenith. - - Parameters - ---------- - zenith : numeric - Solar or apparent zenith angle in degrees - see note above - airmass_relative : numeric - Relative airmass - aod380 : numeric - Aerosol optical depth [cm] measured at 380[nm] - aod500 : numeric - Aerosol optical depth [cm] measured at 500[nm] - precipitable_water : numeric - Precipitable water [cm] - ozone : numeric - Atmospheric ozone [cm], defaults to 0.3[cm] - pressure : numeric - Ambient pressure [Pa], defaults to 101325[Pa] - dni_extra : numeric - Extraterrestrial radiation [W/m^2], defaults to 1364[W/m^2] - asymmetry : numeric - Asymmetry factor, defaults to 0.85 - albedo : numeric - Albedo, defaults to 0.2 - - Returns - ------- - clearsky : DataFrame (if Series input) or OrderedDict of arrays - DataFrame/OrderedDict contains the columns/keys - ``'dhi', 'dni', 'ghi', 'direct_horizontal'`` in [W/m^2]. - - See also - -------- - pvlib.atmosphere.bird_hulstrom80_aod_bb - pvlib.atmosphere.get_relative_airmass - - References - ---------- - .. [1] R. E. Bird and R. L Hulstrom, "A Simplified Clear Sky model for - Direct and Diffuse Insolation on Horizontal Surfaces" SERI Technical - Report SERI/TR-642-761, Feb 1981. Solar Energy Research Institute, - Golden, CO. - - .. [2] Daryl R. Myers, "Solar Radiation: Practical Modeling for Renewable - Energy Applications", pp. 46-51 CRC Press (2013) - - .. [3] `NREL Bird Clear Sky Model `_ - - .. [4] `SERI/TR-642-761 `_ - - .. [5] `Error Reports `_ - """ - etr = dni_extra # extraradiation - ze_rad = np.deg2rad(zenith) # zenith in radians - airmass = airmass_relative - # Bird clear sky model - am_press = atmosphere.get_absolute_airmass(airmass, pressure) - t_rayleigh = ( - np.exp(-0.0903 * am_press ** 0.84 * ( - 1.0 + am_press - am_press ** 1.01 - )) - ) - am_o3 = ozone*airmass - t_ozone = ( - 1.0 - 0.1611 * am_o3 * (1.0 + 139.48 * am_o3) ** -0.3034 - - 0.002715 * am_o3 / (1.0 + 0.044 * am_o3 + 0.0003 * am_o3 ** 2.0) - ) - t_gases = np.exp(-0.0127 * am_press ** 0.26) - am_h2o = airmass * precipitable_water - t_water = ( - 1.0 - 2.4959 * am_h2o / ( - (1.0 + 79.034 * am_h2o) ** 0.6828 + 6.385 * am_h2o - ) - ) - bird_huldstrom = atmosphere.bird_hulstrom80_aod_bb(aod380, aod500) - t_aerosol = np.exp( - -(bird_huldstrom ** 0.873) * - (1.0 + bird_huldstrom - bird_huldstrom ** 0.7088) * airmass ** 0.9108 - ) - taa = 1.0 - 0.1 * (1.0 - airmass + airmass ** 1.06) * (1.0 - t_aerosol) - rs = 0.0685 + (1.0 - asymmetry) * (1.0 - t_aerosol / taa) - id_ = 0.9662 * etr * t_aerosol * t_water * t_gases * t_ozone * t_rayleigh - ze_cos = np.where(zenith < 90, np.cos(ze_rad), 0.0) - id_nh = id_ * ze_cos - ias = ( - etr * ze_cos * 0.79 * t_ozone * t_gases * t_water * taa * - (0.5 * (1.0 - t_rayleigh) + asymmetry * (1.0 - (t_aerosol / taa))) / ( - 1.0 - airmass + airmass ** 1.02 - ) - ) - gh = (id_nh + ias) / (1.0 - albedo * rs) - diffuse_horiz = gh - id_nh - # TODO: be DRY, use decorator to wrap methods that need to return either - # OrderedDict or DataFrame instead of repeating this boilerplate code - irrads = OrderedDict() - irrads['direct_horizontal'] = id_nh - irrads['ghi'] = gh - irrads['dni'] = id_ - irrads['dhi'] = diffuse_horiz - if isinstance(irrads['dni'], pd.Series): - irrads = pd.DataFrame.from_dict(irrads) - return irrads - - def _calc_stats(data, samples_per_window, sample_interval, align): """ Calculates statistics for each window, used by Reno-style clear sky detection functions @@ -816,15 +690,15 @@ def _calc_line_length(data, sample_interval): return np.sum(np.sqrt(np.diff(data)**2 + sample_interval**2)) -def _maxdiff(s): - return np.abs(np.diff(s)).max() - +def _calc_slope_max_diff(meas, clear, window, align): + """ Calculates maximum difference in slope on rolling windows + """ + def _maxdiff(s): + return np.abs(np.diff(s)).max() -def _calc_c5(meas, clear, window, align, limit): center = align == 'center' irrad_diff = meas - clear - slope_diff = irrad_diff.rolling(window, center=center).apply(_maxdiff) - return slope_diff < limit + return irrad_diff.rolling(window, center=center).apply(_maxdiff) def _clear_sample_index(clear_windows, samples_per_window, align, H): @@ -1009,8 +883,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - c5 = _calc_c5(meas, alpha * clear, samples_per_window, - 'center', slope_dev) + c5 = _calc_slope_max_diff( + meas, alpha * clear, samples_per_window, 'center') < slope_dev c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 @@ -1043,13 +917,148 @@ def rmse(alpha): if return_components: components = OrderedDict() - components['mean_diff'] = c1 - components['max_diff'] = c2 - components['line_length'] = c3 - components['slope_nstd'] = c4 - components['slope_max'] = c5 - components['mean_nan'] = c6 + components = OrderedDict() + components['mean_diff_flag'] = c1 + components['max_diff_flag'] = c2 + components['line_length_flag'] = c3 + components['slope_nstd_flag'] = c4 + components['slope_max_flag'] = c5 + components['mean_nan_flag'] = c6 components['windows'] = clear_windows + + components['mean_diff'] = np.abs(meas_mean - alpha * clear_mean) + components['max_diff'] = np.abs(meas_max - alpha * clear_max) + components['line_length'] = meas_line_length - clear_line_length + components['slope_nstd'] = meas_slope_nstd + components['slope_max'] = _calc_slope_max_diff( + meas, alpha * clear, samples_per_window, 'center') + return clear_samples, components, alpha else: return clear_samples + + +def bird(zenith, airmass_relative, aod380, aod500, precipitable_water, + ozone=0.3, pressure=101325., dni_extra=1364., asymmetry=0.85, + albedo=0.2): + """ + Bird Simple Clear Sky Broadband Solar Radiation Model + + Based on NREL Excel implementation by Daryl R. Myers [1, 2]. + + Bird and Hulstrom define the zenith as the "angle between a line to + the sun and the local zenith". There is no distinction in the paper + between solar zenith and apparent (or refracted) zenith, but the + relative airmass is defined using the Kasten 1966 expression, which + requires apparent zenith. Although the formulation for calculated + zenith is never explicitly defined in the report, since the purpose + was to compare existing clear sky models with "rigorous radiative + transfer models" (RTM) it is possible that apparent zenith was + obtained as output from the RTM. However, the implentation presented + in PVLIB is tested against the NREL Excel implementation by Daryl + Myers which uses an analytical expression for solar zenith instead + of apparent zenith. + + Parameters + ---------- + zenith : numeric + Solar or apparent zenith angle in degrees - see note above + airmass_relative : numeric + Relative airmass + aod380 : numeric + Aerosol optical depth [cm] measured at 380[nm] + aod500 : numeric + Aerosol optical depth [cm] measured at 500[nm] + precipitable_water : numeric + Precipitable water [cm] + ozone : numeric + Atmospheric ozone [cm], defaults to 0.3[cm] + pressure : numeric + Ambient pressure [Pa], defaults to 101325[Pa] + dni_extra : numeric + Extraterrestrial radiation [W/m^2], defaults to 1364[W/m^2] + asymmetry : numeric + Asymmetry factor, defaults to 0.85 + albedo : numeric + Albedo, defaults to 0.2 + + Returns + ------- + clearsky : DataFrame (if Series input) or OrderedDict of arrays + DataFrame/OrderedDict contains the columns/keys + ``'dhi', 'dni', 'ghi', 'direct_horizontal'`` in [W/m^2]. + + See also + -------- + pvlib.atmosphere.bird_hulstrom80_aod_bb + pvlib.atmosphere.get_relative_airmass + + References + ---------- + .. [1] R. E. Bird and R. L Hulstrom, "A Simplified Clear Sky model for + Direct and Diffuse Insolation on Horizontal Surfaces" SERI Technical + Report SERI/TR-642-761, Feb 1981. Solar Energy Research Institute, + Golden, CO. + + .. [2] Daryl R. Myers, "Solar Radiation: Practical Modeling for Renewable + Energy Applications", pp. 46-51 CRC Press (2013) + + .. [3] `NREL Bird Clear Sky Model `_ + + .. [4] `SERI/TR-642-761 `_ + + .. [5] `Error Reports `_ + """ + etr = dni_extra # extraradiation + ze_rad = np.deg2rad(zenith) # zenith in radians + airmass = airmass_relative + # Bird clear sky model + am_press = atmosphere.get_absolute_airmass(airmass, pressure) + t_rayleigh = ( + np.exp(-0.0903 * am_press ** 0.84 * ( + 1.0 + am_press - am_press ** 1.01 + )) + ) + am_o3 = ozone*airmass + t_ozone = ( + 1.0 - 0.1611 * am_o3 * (1.0 + 139.48 * am_o3) ** -0.3034 - + 0.002715 * am_o3 / (1.0 + 0.044 * am_o3 + 0.0003 * am_o3 ** 2.0) + ) + t_gases = np.exp(-0.0127 * am_press ** 0.26) + am_h2o = airmass * precipitable_water + t_water = ( + 1.0 - 2.4959 * am_h2o / ( + (1.0 + 79.034 * am_h2o) ** 0.6828 + 6.385 * am_h2o + ) + ) + bird_huldstrom = atmosphere.bird_hulstrom80_aod_bb(aod380, aod500) + t_aerosol = np.exp( + -(bird_huldstrom ** 0.873) * + (1.0 + bird_huldstrom - bird_huldstrom ** 0.7088) * airmass ** 0.9108 + ) + taa = 1.0 - 0.1 * (1.0 - airmass + airmass ** 1.06) * (1.0 - t_aerosol) + rs = 0.0685 + (1.0 - asymmetry) * (1.0 - t_aerosol / taa) + id_ = 0.9662 * etr * t_aerosol * t_water * t_gases * t_ozone * t_rayleigh + ze_cos = np.where(zenith < 90, np.cos(ze_rad), 0.0) + id_nh = id_ * ze_cos + ias = ( + etr * ze_cos * 0.79 * t_ozone * t_gases * t_water * taa * + (0.5 * (1.0 - t_rayleigh) + asymmetry * (1.0 - (t_aerosol / taa))) / ( + 1.0 - airmass + airmass ** 1.02 + ) + ) + gh = (id_nh + ias) / (1.0 - albedo * rs) + diffuse_horiz = gh - id_nh + # TODO: be DRY, use decorator to wrap methods that need to return either + # OrderedDict or DataFrame instead of repeating this boilerplate code + irrads = OrderedDict() + irrads['direct_horizontal'] = id_nh + irrads['ghi'] = gh + irrads['dni'] = id_ + irrads['dhi'] = diffuse_horiz + if isinstance(irrads['dni'], pd.Series): + irrads = pd.DataFrame.from_dict(irrads) + return irrads diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 30ffbe9ddb..b1bde6f06a 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -579,8 +579,8 @@ def test_detect_clearsky_iterations(detect_clearsky_data): with pytest.warns(RuntimeWarning): clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=1) - assert (clear_samples[:'2012-04-01 10:41:00'] == True).all() - assert (clear_samples['2012-04-01 10:42:00':] == False).all() + assert (clear_samples[:'2012-04-01 10:41:00'] is True).all() + assert (clear_samples['2012-04-01 10:42:00':] is False).all() clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples, From 098edebc7520f047c97ed785b814111998cc42b1 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 22 Dec 2020 14:45:14 -0700 Subject: [PATCH 34/60] update function name --- pvlib/tests/test_clearsky.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index b1bde6f06a..9b63eed230 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -579,8 +579,8 @@ def test_detect_clearsky_iterations(detect_clearsky_data): with pytest.warns(RuntimeWarning): clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=1) - assert (clear_samples[:'2012-04-01 10:41:00'] is True).all() - assert (clear_samples['2012-04-01 10:42:00':] is False).all() + assert clear_samples[:'2012-04-01 10:41:00'].all() + assert not clear_samples['2012-04-01 10:42:00':].all() # expected False clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples, @@ -670,7 +670,7 @@ def test__calc_stats(): assert_series_equal(res_slope, expected[align]['slope']) -def test__calc_c5(): +def test__calc_slope_max_diff(): alignments = ['center'] # 'left' and 'right' could be added in the future ms = pd.Series(np.array([1., 0., 2., 5., -10.])) cs = pd.Series(np.array([0., 0., 1., 1., 0.])) @@ -681,7 +681,8 @@ def test__calc_c5(): expected['right'] = pd.Series([np.nan, np.nan, 1., 4., 10.]) < limit # here test for window=3 for align in alignments: - result = clearsky._calc_c5(ms, cs, window=3, align=align, limit=limit) + result = clearsky._calc_slope_max_diff( + ms, cs, window=3, align=align) < limit assert_series_equal(result, expected[align]) From 47ccf0e811908c7137691510b763fd1cfabf09c4 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 22 Dec 2020 17:09:25 -0700 Subject: [PATCH 35/60] factor line length into helper for performance --- docs/sphinx/source/whatsnew/v0.8.1.rst | 8 +++- pvlib/clearsky.py | 66 +++++++++++++++++++------- pvlib/tests/test_clearsky.py | 28 +++++++---- 3 files changed, 76 insertions(+), 26 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.8.1.rst b/docs/sphinx/source/whatsnew/v0.8.1.rst index c559d2de3f..91f93b2716 100644 --- a/docs/sphinx/source/whatsnew/v0.8.1.rst +++ b/docs/sphinx/source/whatsnew/v0.8.1.rst @@ -8,7 +8,8 @@ Breaking changes Deprecations ~~~~~~~~~~~~ -* ``pvlib.irradiance.liujordan`` is deprecated. +* ``pvlib.irradiance.liujordan`` is deprecated. :py:func:`pvlib.irradiance.campbell_norman` + replaces ``pvlib.irradiance.liujordan``. Enhancements ~~~~~~~~~~~~ @@ -18,7 +19,10 @@ Enhancements only NOCT. (:pull:`1045`) * :py:func:`pvlib.clearsky.detect_clearsky` now uses centered rolling windows instead of left-aligned rolling windows. (:pull:`1074`) -* The 'times' parameter is now optional in :py:func:`pvlib.clearsky.detect_clearsky`. (:pull:`1074`) +* The 'times' and 'window_length` parameters are now optional kwargs in + :py:func:`pvlib.clearsky.detect_clearsky`. If omitted, 'times' is set + equal to the index of parameter 'measured', and 'window_length' is set to + 10 minutes. (:pull:`1074`) * Added :py:func:`pvlib.inverter.sandia_multi` and :py:func:`pvlib.inverter.pvwatts_multi` for modeling inverters with multiple MPPTs (:issue:`457`, :pull:`1085`, :pull:`1106`) * Added optional ``attributes`` parameter to :py:func:`pvlib.iotools.get_psm3` diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 4336994377..a0dc37e2e5 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -601,7 +601,8 @@ def _calc_d(aod700, p): def _calc_stats(data, samples_per_window, sample_interval, align): """ Calculates statistics for each window, used by Reno-style clear - sky detection functions + sky detection functions. Does not return the line length statistic + which is provided by _calc_line_length_windowed Parameters ---------- @@ -620,8 +621,6 @@ def _calc_stats(data, samples_per_window, sample_interval, align): mean of data in each window data_max : Series maximum of data in each window - data_line_length : Series - length of line segments connecting successive data points data_slope_nstd : Series standard deviation of difference between data points in each window data_slope : Series @@ -638,10 +637,8 @@ def _calc_stats(data, samples_per_window, sample_interval, align): data_slope = data_diff / sample_interval data_slope_nstd = roller.apply(_calc_slope_nstd) data_slope_nstd = data_slope_nstd.shift(shift) - data_line_length = roller.apply(_calc_line_length, args=(sample_interval,)) - data_line_length = data_line_length.shift(shift) - return (data_mean, data_max, data_line_length, data_slope_nstd, data_slope) + return data_mean, data_max, data_slope_nstd, data_slope def _calc_slope_nstd(data): @@ -684,6 +681,36 @@ def _get_sample_intervals(times, win_length): 'times. consider resampling your data.') +def _calc_line_length_windowed(data, samples_per_window, sample_interval, + align): + """ Calculates line-length for each window in data + + Parameters + ---------- + data : Series + samples_per_window : int + Number of data points in each window + sample_interval : float + Time in minutes in each sample interval + align : str + Alignment of labels to data in sliding window. Must be one of + 'left', 'center', 'right'. + + Returns + ------- + data_line_length : Series + length of line segments connecting successive data points + """ + + shift = _shift_from_align(align, samples_per_window) + center = align == 'center' + roller = data.rolling(samples_per_window, center=center) + data_line_length = roller.apply(_calc_line_length, args=(sample_interval,)) + data_line_length = data_line_length.shift(shift) + + return data_line_length + + def _calc_line_length(data, sample_interval): """ Calculates line length for Reno-style clear sky detection functions. """ @@ -705,9 +732,13 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): """ Returns indices of clear samples in clear windows """ - # first window is in first column of H. clear_windows is aligned by 'align' - # shift clear_windows.index so that first window is in left position, lined - # up with column of H containing the indices of the first window + # H contains indices for each window, e.g. indices for the first window + # are in first column of H. + # clear_windows contains one boolean for each window and is aligned + # by 'align', default to center + # shift clear_windows.index to be aligned left (e.g. first value in the + # left-most position) to line up with the first column of H. + # commented if/else block for future align='left', 'right' capability # if align == 'right': # shift = 1 - samples_per_window @@ -718,8 +749,8 @@ def _clear_sample_index(clear_windows, samples_per_window, align, H): shift = -(samples_per_window // 2) idx = clear_windows.shift(shift) # drop rows at the end corresponding to windows past the end of data - idx.drop(clear_windows.index[1 - samples_per_window:], inplace=True) - idx = idx.astype(bool) # shift added nan, changed type to object + idx = idx.drop(clear_windows.index[1 - samples_per_window:]) + idx = idx.astype(bool) # shift changed type to object clear_samples = np.unique(H[:, idx]) return clear_samples @@ -858,11 +889,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, np.arange(samples_per_window-1, len(times))) # calculate measurement statistics - meas_mean, meas_max, meas_line_length, meas_slope_nstd, meas_slope \ + meas_mean, meas_max, meas_slope_nstd, meas_slope \ = _calc_stats(meas, samples_per_window, sample_interval, 'center') + meas_line_length = _calc_line_length_windowed( + meas, samples_per_window, sample_interval, 'center') # calculate clear sky statistics - clear_mean, clear_max, _, _, clear_slope \ + clear_mean, clear_max, _, clear_slope \ = _calc_stats(clear, samples_per_window, sample_interval, 'center') # find a scaling factor for the clear sky time series that minimizes the @@ -873,8 +906,9 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # at least 50% of the day being identified as clear. alpha = 1 for iteration in range(max_iterations): - _, _, clear_line_length, _, _ = _calc_stats( - alpha * clear, samples_per_window, sample_interval, 'center') + scaled_clear = alpha * clear + clear_line_length = _calc_line_length_windowed( + scaled_clear, samples_per_window, sample_interval, 'center') line_diff = meas_line_length - clear_line_length @@ -884,7 +918,7 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff c5 = _calc_slope_max_diff( - meas, alpha * clear, samples_per_window, 'center') < slope_dev + meas, scaled_clear, samples_per_window, 'center') < slope_dev c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 9b63eed230..d27c62bce4 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -631,21 +631,36 @@ def test_detect_clearsky_missing_index(detect_clearsky_data): clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) -def test__calc_stats(): +def test__calc_line_length_windowed(): # assumes window=3 alignments = ['center'] # 'left' and 'right' could be added in the future shift = {'center': -1} # 'left': -2, 'right': 0 - x = pd.Series(np.arange(0, 7)**2.) - # all assume right align # line length between adjacent points sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) + expected = {} + for align in alignments: + expected[align] = {} + s = shift[align] + line_length = sqt + sqt.shift(-1) + expected[align] = line_length.shift(s + 1) + for align in expected: + data = expected[align]['data'] + result = clearsky._calc_line_length_windowed( + data=data, samples_per_window=3, sample_interval=1, align=align) + assert_series_equal(result, expected[align]) + + +def test__calc_stats(): + # assumes window=3 + alignments = ['center'] # 'left' and 'right' could be added in the future + shift = {'center': -1} # 'left': -2, 'right': 0 + x = pd.Series(np.arange(0, 7)**2.) mean_x = pd.Series(np.array([np.nan, np.nan, 5, 14, 29, 50, 77]) / 3.) max_x = pd.Series(np.array([np.nan, np.nan, 4, 9, 16, 25, 36])) diff_std = np.array([np.nan, np.nan, np.sqrt(2), np.sqrt(2), np.sqrt(2), np.sqrt(2), np.sqrt(2)]) slope_nstd = diff_std / mean_x slope = x.diff().shift(-1) - expected = {} for align in alignments: expected[align] = {} @@ -654,18 +669,15 @@ def test__calc_stats(): expected[align]['max'] = max_x.shift(s) # slope between adjacent points expected[align]['slope'] = slope - line_length = sqt + sqt.shift(-1) - expected[align]['line_length'] = line_length.shift(s + 1) expected[align]['slope_nstd'] = slope_nstd.shift(s) expected[align]['data'] = x for align in expected: data = expected[align]['data'] result = clearsky._calc_stats(data=data, samples_per_window=3, sample_interval=1, align=align) - res_mean, res_max, res_line_length, res_slope_nstd, res_slope = result + res_mean, res_max, res_slope_nstd, res_slope = result assert_series_equal(res_mean, expected[align]['mean']) assert_series_equal(res_max, expected[align]['max']) - assert_series_equal(res_line_length, expected[align]['line_length']) assert_series_equal(res_slope_nstd, expected[align]['slope_nstd']) assert_series_equal(res_slope, expected[align]['slope']) From 5927d738946d7423737fa8edd1256faeebfba7b3 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 22 Dec 2020 18:22:07 -0700 Subject: [PATCH 36/60] fix test__calc_line_length_windowed --- pvlib/tests/test_clearsky.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index d27c62bce4..2b1eff47b9 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -635,6 +635,7 @@ def test__calc_line_length_windowed(): # assumes window=3 alignments = ['center'] # 'left' and 'right' could be added in the future shift = {'center': -1} # 'left': -2, 'right': 0 + x = pd.Series(np.arange(0, 7)**2.) # line length between adjacent points sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) expected = {} @@ -642,12 +643,13 @@ def test__calc_line_length_windowed(): expected[align] = {} s = shift[align] line_length = sqt + sqt.shift(-1) - expected[align] = line_length.shift(s + 1) + expected[align]['line_length'] = line_length.shift(s + 1) + expected[align]['data'] = x for align in expected: data = expected[align]['data'] result = clearsky._calc_line_length_windowed( data=data, samples_per_window=3, sample_interval=1, align=align) - assert_series_equal(result, expected[align]) + assert_series_equal(result, expected[align]['line_length']) def test__calc_stats(): From 55453d773c1ff76633d1ce7486258b2f7e4c2cb8 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 22 Dec 2020 19:44:58 -0700 Subject: [PATCH 37/60] add benchmarks/detect_clearsky.py --- benchmarks/benchmarks/detect_clearsky.py | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 benchmarks/benchmarks/detect_clearsky.py diff --git a/benchmarks/benchmarks/detect_clearsky.py b/benchmarks/benchmarks/detect_clearsky.py new file mode 100644 index 0000000000..94f7f9406a --- /dev/null +++ b/benchmarks/benchmarks/detect_clearsky.py @@ -0,0 +1,30 @@ +""" +ASV benchmarks for detect clear sky function. +""" + +import pandas as pd +from pvlib import clearsky, solarposition +import numpy as np + + +class DetectClear: + + def setup(self): + self.times = pd.date_range(start='20180601', freq='1min', + periods=14400) + self.lat = 35.1 + self.lon = -106.6 + self.solar_position = solarposition.get_solarposition( + self.times, self.lat, self.lon) + clearsky_df = clearsky.simplified_solis( + self.solar_position['apparent_elevation']) + self.clearsky = self.clearsky_df['ghi'] + measured_dni = clearsky_df['dni'].where(self.times.hour % 2, 0) + cos_zen = np.cos(np.deg2rad(self.solar_position['apparent_zenith'])) + self.measured = measured_dni * cos_zen + clearsky_df['dhi'] + self.measured *= 0.98 + + def detect_clearsky(self): + clearsky.detect_clearsky( + self.measured, self.clearsky, self.times, self.window_length + ) From 1d16a310f14cb61bbe93f38d10bbba724d9ae001 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 22 Dec 2020 20:05:14 -0700 Subject: [PATCH 38/60] fix mistakes --- benchmarks/benchmarks/detect_clearsky.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmarks/detect_clearsky.py b/benchmarks/benchmarks/detect_clearsky.py index 94f7f9406a..d42e38a692 100644 --- a/benchmarks/benchmarks/detect_clearsky.py +++ b/benchmarks/benchmarks/detect_clearsky.py @@ -18,13 +18,15 @@ def setup(self): self.times, self.lat, self.lon) clearsky_df = clearsky.simplified_solis( self.solar_position['apparent_elevation']) - self.clearsky = self.clearsky_df['ghi'] - measured_dni = clearsky_df['dni'].where(self.times.hour % 2, 0) + self.clearsky = clearsky_df['ghi'] + measured_dni = clearsky_df['dni'].where( + (self.times.hour % 2).astype(bool), 0) cos_zen = np.cos(np.deg2rad(self.solar_position['apparent_zenith'])) self.measured = measured_dni * cos_zen + clearsky_df['dhi'] self.measured *= 0.98 + self.window_length = 10 - def detect_clearsky(self): + def time_detect_clearsky(self): clearsky.detect_clearsky( self.measured, self.clearsky, self.times, self.window_length ) From f5ffda39d3eb37088dae73fa24dc58699c3f703e Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 23 Dec 2020 11:44:41 -0700 Subject: [PATCH 39/60] work around pd.rolling.apply, drop align options --- pvlib/clearsky.py | 128 +++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 82 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index a0dc37e2e5..3eaab82311 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -599,10 +599,10 @@ def _calc_d(aod700, p): return d -def _calc_stats(data, samples_per_window, sample_interval, align): +def _calc_stats(data, samples_per_window, sample_interval, H): """ Calculates statistics for each window, used by Reno-style clear sky detection functions. Does not return the line length statistic - which is provided by _calc_line_length_windowed + which is provided by _calc_windowed_stat and _line_length Parameters ---------- @@ -611,9 +611,8 @@ def _calc_stats(data, samples_per_window, sample_interval, align): Number of data points in each window sample_interval : float Time in minutes in each sample interval - align : str - Alignment of labels to data in sliding window. Must be one of - 'left', 'center', 'right'. + H : 2D ndarray + Hankel matrix defining the indices for each window. Returns ------- @@ -627,41 +626,34 @@ def _calc_stats(data, samples_per_window, sample_interval, align): difference between successive data points """ - shift = _shift_from_align(align, samples_per_window) - center = align == 'center' - roller = data.rolling(samples_per_window, center=center) - data_mean = roller.mean().shift(shift) - data_max = roller.max().shift(shift) + roller = data.rolling(samples_per_window, center=True) + data_mean = roller.mean() + data_max = roller.max() # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval - data_slope_nstd = roller.apply(_calc_slope_nstd) - data_slope_nstd = data_slope_nstd.shift(shift) + data_slope_nstd = _calc_windowed_stat( + data.values, _slope_nstd, samples_per_window, H) + data_slope_nstd = data_slope_nstd return data_mean, data_max, data_slope_nstd, data_slope -def _calc_slope_nstd(data): - return np.diff(data).std(ddof=1) / data.mean() +def _slope_nstd(data): + with np.errstate(divide='ignore', invalid='ignore'): + return np.diff(data).std(ddof=1) / data.mean() -def _shift_from_align(align, window): - # development code left here as comments in case we decide to support - # align='left', 'right' in the future - # number of places to shift to line up pandas.Series.rolling with - # align value. - # pandas.Series.rolling uses kwarg center with values of True or False - # default is False which means right aligned labels by default - # center=True aligns to center - # Here, calculate shift to align='left' (legacy pvlib behavior) - # code here for future capability if align='left', 'right' are of interest - # commented out to avoid decreasing test coverage - # if align == 'left': - # shift = 1 - window - # else: - # shift = 0 - shift = 0 - return shift +def _line_length(data, sample_interval): + """ + Calculates line length where sample_interval is the time step + between data points. + """ + return np.sum(np.sqrt(np.diff(data)**2 + sample_interval**2)) + + +def _max_diff(data): + return np.abs(np.diff(data)).max() def _get_sample_intervals(times, win_length): @@ -681,51 +673,23 @@ def _get_sample_intervals(times, win_length): 'times. consider resampling your data.') -def _calc_line_length_windowed(data, samples_per_window, sample_interval, - align): - """ Calculates line-length for each window in data +def _to_centered_series(vals, idx, H, samples_per_window): + vals = np.pad(vals, ((0, len(idx) - H.shape[1]),), constant_values=np.nan) + shift = samples_per_window // 2 # align = 'center' only + return pd.Series(index=idx, data=vals).shift(shift) - Parameters - ---------- - data : Series - samples_per_window : int - Number of data points in each window - sample_interval : float - Time in minutes in each sample interval - align : str - Alignment of labels to data in sliding window. Must be one of - 'left', 'center', 'right'. - - Returns - ------- - data_line_length : Series - length of line segments connecting successive data points - """ - - shift = _shift_from_align(align, samples_per_window) - center = align == 'center' - roller = data.rolling(samples_per_window, center=center) - data_line_length = roller.apply(_calc_line_length, args=(sample_interval,)) - data_line_length = data_line_length.shift(shift) - return data_line_length - - -def _calc_line_length(data, sample_interval): - """ Calculates line length for Reno-style clear sky detection functions. +def _calc_windowed_stat(data, func, samples_per_window, H, args=()): """ - return np.sum(np.sqrt(np.diff(data)**2 + sample_interval**2)) + Applies func to each rolling window in data. data must be Series. + func accepts a 1d array and returns a float. args are passed to func. + H is a Hankel matrix defining the indices for each window. - -def _calc_slope_max_diff(meas, clear, window, align): - """ Calculates maximum difference in slope on rolling windows + Returns a Series of the output of func in each window, aligned to center + of each window. """ - def _maxdiff(s): - return np.abs(np.diff(s)).max() - - center = align == 'center' - irrad_diff = meas - clear - return irrad_diff.rolling(window, center=center).apply(_maxdiff) + vals = np.apply_along_axis(func, 0, data.values[H], *args) + return _to_centered_series(vals, data.index, H, samples_per_window) def _clear_sample_index(clear_windows, samples_per_window, align, H): @@ -890,13 +854,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # calculate measurement statistics meas_mean, meas_max, meas_slope_nstd, meas_slope \ - = _calc_stats(meas, samples_per_window, sample_interval, 'center') - meas_line_length = _calc_line_length_windowed( - meas, samples_per_window, sample_interval, 'center') + = _calc_stats(meas, samples_per_window, sample_interval, H) + meas_line_length = _calc_windowed_stat( + meas, _line_length, samples_per_window, H, args=(sample_interval,)) # calculate clear sky statistics clear_mean, clear_max, _, clear_slope \ - = _calc_stats(clear, samples_per_window, sample_interval, 'center') + = _calc_stats(clear, samples_per_window, sample_interval, H) # find a scaling factor for the clear sky time series that minimizes the # RMSE between the clear times identified in the measured data and the @@ -907,18 +871,19 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, alpha = 1 for iteration in range(max_iterations): scaled_clear = alpha * clear - clear_line_length = _calc_line_length_windowed( - scaled_clear, samples_per_window, sample_interval, 'center') + clear_line_length = _calc_windowed_stat( + scaled_clear, _line_length, samples_per_window, H, + args=(sample_interval,)) line_diff = meas_line_length - clear_line_length - + slope_max_diff = _calc_windowed_stat( + meas - scaled_clear, _max_diff, samples_per_window, H) # evaluate comparison criteria c1 = np.abs(meas_mean - alpha*clear_mean) < mean_diff c2 = np.abs(meas_max - alpha*clear_max) < max_diff c3 = (line_diff > lower_line_length) & (line_diff < upper_line_length) c4 = meas_slope_nstd < var_diff - c5 = _calc_slope_max_diff( - meas, scaled_clear, samples_per_window, 'center') < slope_dev + c5 = slope_max_diff < slope_dev c6 = (clear_mean != 0) & ~np.isnan(clear_mean) clear_windows = c1 & c2 & c3 & c4 & c5 & c6 @@ -964,8 +929,7 @@ def rmse(alpha): components['max_diff'] = np.abs(meas_max - alpha * clear_max) components['line_length'] = meas_line_length - clear_line_length components['slope_nstd'] = meas_slope_nstd - components['slope_max'] = _calc_slope_max_diff( - meas, alpha * clear, samples_per_window, 'center') + components['slope_max'] = slope_max_diff return clear_samples, components, alpha else: From 8702a1c2e95df03ea0ec2f0869be00f67e369c26 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 23 Dec 2020 11:52:47 -0700 Subject: [PATCH 40/60] remove mistaken .values --- pvlib/clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 3eaab82311..3308a71dd0 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -633,7 +633,7 @@ def _calc_stats(data, samples_per_window, sample_interval, H): data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval data_slope_nstd = _calc_windowed_stat( - data.values, _slope_nstd, samples_per_window, H) + data, _slope_nstd, samples_per_window, H) data_slope_nstd = data_slope_nstd return data_mean, data_max, data_slope_nstd, data_slope From 608c1d4b3cf2618c7d5b0105752ee279b3c1f626 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 23 Dec 2020 12:03:55 -0700 Subject: [PATCH 41/60] add test for new _windowed_stat function --- pvlib/tests/test_clearsky.py | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 2b1eff47b9..221a5af4cc 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -4,6 +4,7 @@ from numpy import nan import pandas as pd import pytz +from scipy.linalg import hankel import pytest from numpy.testing import assert_allclose @@ -631,13 +632,16 @@ def test_detect_clearsky_missing_index(detect_clearsky_data): clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) -def test__calc_line_length_windowed(): - # assumes window=3 +def test__calc_windowed_stat(): + # sqt is hand-calculated assuming window=3 + samples_per_window = 3 alignments = ['center'] # 'left' and 'right' could be added in the future shift = {'center': -1} # 'left': -2, 'right': 0 x = pd.Series(np.arange(0, 7)**2.) # line length between adjacent points sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) + H = hankel(np.arange(samples_per_window), + np.arange(samples_per_window-1, len(sqt))) expected = {} for align in alignments: expected[align] = {} @@ -647,8 +651,10 @@ def test__calc_line_length_windowed(): expected[align]['data'] = x for align in expected: data = expected[align]['data'] - result = clearsky._calc_line_length_windowed( - data=data, samples_per_window=3, sample_interval=1, align=align) + sample_interval = 1 + result = clearsky._calc_windowed_stat( + data, clearsky._line_length, samples_per_window, H, + args=(sample_interval,)) assert_series_equal(result, expected[align]['line_length']) @@ -676,7 +682,7 @@ def test__calc_stats(): for align in expected: data = expected[align]['data'] result = clearsky._calc_stats(data=data, samples_per_window=3, - sample_interval=1, align=align) + sample_interval=1) res_mean, res_max, res_slope_nstd, res_slope = result assert_series_equal(res_mean, expected[align]['mean']) assert_series_equal(res_max, expected[align]['max']) @@ -684,22 +690,6 @@ def test__calc_stats(): assert_series_equal(res_slope, expected[align]['slope']) -def test__calc_slope_max_diff(): - alignments = ['center'] # 'left' and 'right' could be added in the future - ms = pd.Series(np.array([1., 0., 2., 5., -10.])) - cs = pd.Series(np.array([0., 0., 1., 1., 0.])) - limit = 2. - expected = {} - expected['left'] = pd.Series([1., 4., 10., np.nan, np.nan]) < limit - expected['center'] = pd.Series([np.nan, 1., 4., 10., np.nan]) < limit - expected['right'] = pd.Series([np.nan, np.nan, 1., 4., 10.]) < limit - # here test for window=3 - for align in alignments: - result = clearsky._calc_slope_max_diff( - ms, cs, window=3, align=align) < limit - assert_series_equal(result, expected[align]) - - def test_bird(): """Test Bird/Hulstrom Clearsky Model""" times = pd.date_range(start='1/1/2015 0:00', end='12/31/2015 23:00', From 9387067ece0b9399315b49ab97039a2061029c1f Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 23 Dec 2020 12:10:23 -0700 Subject: [PATCH 42/60] patch up test__calc_stats --- pvlib/tests/test_clearsky.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 221a5af4cc..ce0979ff40 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -583,7 +583,7 @@ def test_detect_clearsky_iterations(detect_clearsky_data): assert clear_samples[:'2012-04-01 10:41:00'].all() assert not clear_samples['2012-04-01 10:42:00':].all() # expected False clear_samples = clearsky.detect_clearsky( - expected['GHI'], cs['ghi']*alpha, max_iterations=20) + expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples, check_dtype=False, check_names=False) @@ -659,7 +659,9 @@ def test__calc_windowed_stat(): def test__calc_stats(): - # assumes window=3 + # stats are hand-computed assuming window = 3 and sample_interval = 1 + samples_per_window = 3 + sample_interval = 1 alignments = ['center'] # 'left' and 'right' could be added in the future shift = {'center': -1} # 'left': -2, 'right': 0 x = pd.Series(np.arange(0, 7)**2.) @@ -669,6 +671,8 @@ def test__calc_stats(): np.sqrt(2), np.sqrt(2)]) slope_nstd = diff_std / mean_x slope = x.diff().shift(-1) + H = hankel(np.arange(samples_per_window), + np.arange(samples_per_window-1, len(x))) expected = {} for align in alignments: expected[align] = {} @@ -681,8 +685,8 @@ def test__calc_stats(): expected[align]['data'] = x for align in expected: data = expected[align]['data'] - result = clearsky._calc_stats(data=data, samples_per_window=3, - sample_interval=1) + result = clearsky._calc_stats(data, samples_per_window, + sample_interval, H) res_mean, res_max, res_slope_nstd, res_slope = result assert_series_equal(res_mean, expected[align]['mean']) assert_series_equal(res_max, expected[align]['max']) From c3c1f9e3425f6e56939217fcbf3c7309546688f6 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Wed, 23 Dec 2020 12:54:52 -0700 Subject: [PATCH 43/60] add mode kwarg for np.pad --- pvlib/clearsky.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 3308a71dd0..a0c25e778e 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -674,7 +674,8 @@ def _get_sample_intervals(times, win_length): def _to_centered_series(vals, idx, H, samples_per_window): - vals = np.pad(vals, ((0, len(idx) - H.shape[1]),), constant_values=np.nan) + vals = np.pad(vals, ((0, len(idx) - H.shape[1]),), mode='constant', + constant_values=np.nan) shift = samples_per_window // 2 # align = 'center' only return pd.Series(index=idx, data=vals).shift(shift) From e0411716654ed7cb6255d66aa3d24e8880c68807 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 09:58:40 -0700 Subject: [PATCH 44/60] replace windowed stats calculations --- pvlib/clearsky.py | 66 +++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index a0c25e778e..e1fcc58228 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -632,28 +632,37 @@ def _calc_stats(data, samples_per_window, sample_interval, H): # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval - data_slope_nstd = _calc_windowed_stat( - data, _slope_nstd, samples_per_window, H) + data_slope_nstd = _slope_nstd_windowed(data, H, samples_per_window) data_slope_nstd = data_slope_nstd return data_mean, data_max, data_slope_nstd, data_slope -def _slope_nstd(data): +def _slope_nstd_windowed(data, H, samples_per_window): with np.errstate(divide='ignore', invalid='ignore'): - return np.diff(data).std(ddof=1) / data.mean() + raw = np.diff(data) + raw = raw[H[:-1, ]].std(ddof=1, axis=0) / data[H].mean(axis=0) + return _to_centered_series(raw, data.index, samples_per_window) -def _line_length(data, sample_interval): - """ - Calculates line length where sample_interval is the time step - between data points. - """ - return np.sum(np.sqrt(np.diff(data)**2 + sample_interval**2)) +def _max_diff_windowed(data, H, samples_per_window): + raw = np.diff(data) + raw = np.abs(raw[H[:-1, ]]).max(axis=0) + return _to_centered_series(raw, data.index, samples_per_window) -def _max_diff(data): - return np.abs(np.diff(data)).max() +def _line_length_windowed(data, H, samples_per_window, + sample_interval): + raw = np.sqrt(np.diff(data)**2. + sample_interval**2.) + raw = np.sum(raw[H[:-1, ]], axis=0) + return _to_centered_series(raw, data.index, samples_per_window) + + +def _to_centered_series(vals, idx, samples_per_window): + vals = np.pad(vals, ((0, len(idx) - len(vals)),), mode='constant', + constant_values=np.nan) + shift = samples_per_window // 2 # align = 'center' only + return pd.Series(index=idx, data=vals).shift(shift) def _get_sample_intervals(times, win_length): @@ -673,26 +682,6 @@ def _get_sample_intervals(times, win_length): 'times. consider resampling your data.') -def _to_centered_series(vals, idx, H, samples_per_window): - vals = np.pad(vals, ((0, len(idx) - H.shape[1]),), mode='constant', - constant_values=np.nan) - shift = samples_per_window // 2 # align = 'center' only - return pd.Series(index=idx, data=vals).shift(shift) - - -def _calc_windowed_stat(data, func, samples_per_window, H, args=()): - """ - Applies func to each rolling window in data. data must be Series. - func accepts a 1d array and returns a float. args are passed to func. - H is a Hankel matrix defining the indices for each window. - - Returns a Series of the output of func in each window, aligned to center - of each window. - """ - vals = np.apply_along_axis(func, 0, data.values[H], *args) - return _to_centered_series(vals, data.index, H, samples_per_window) - - def _clear_sample_index(clear_windows, samples_per_window, align, H): """ Returns indices of clear samples in clear windows @@ -856,8 +845,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # calculate measurement statistics meas_mean, meas_max, meas_slope_nstd, meas_slope \ = _calc_stats(meas, samples_per_window, sample_interval, H) - meas_line_length = _calc_windowed_stat( - meas, _line_length, samples_per_window, H, args=(sample_interval,)) + meas_line_length = _line_length_windowed( + meas, H, samples_per_window, sample_interval) # calculate clear sky statistics clear_mean, clear_max, _, clear_slope \ @@ -872,13 +861,12 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, alpha = 1 for iteration in range(max_iterations): scaled_clear = alpha * clear - clear_line_length = _calc_windowed_stat( - scaled_clear, _line_length, samples_per_window, H, - args=(sample_interval,)) + clear_line_length = _line_length_windowed( + scaled_clear, H, samples_per_window, sample_interval) line_diff = meas_line_length - clear_line_length - slope_max_diff = _calc_windowed_stat( - meas - scaled_clear, _max_diff, samples_per_window, H) + slope_max_diff = _max_diff_windowed( + meas - scaled_clear, H, samples_per_window) # evaluate comparison criteria c1 = np.abs(meas_mean - alpha*clear_mean) < mean_diff c2 = np.abs(meas_max - alpha*clear_max) < max_diff From 7c7b4b2e0bab98e66152cab62d6d73776e7ef335 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 10:12:57 -0700 Subject: [PATCH 45/60] add tests for new windowed functions --- pvlib/tests/test_clearsky.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index ce0979ff40..5b982da9d7 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -632,7 +632,7 @@ def test_detect_clearsky_missing_index(detect_clearsky_data): clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) -def test__calc_windowed_stat(): +def test__line_length_windowed(): # sqt is hand-calculated assuming window=3 samples_per_window = 3 alignments = ['center'] # 'left' and 'right' could be added in the future @@ -652,12 +652,25 @@ def test__calc_windowed_stat(): for align in expected: data = expected[align]['data'] sample_interval = 1 - result = clearsky._calc_windowed_stat( - data, clearsky._line_length, samples_per_window, H, - args=(sample_interval,)) + result = clearsky._line_length_windowed( + data, H, samples_per_window, sample_interval) assert_series_equal(result, expected[align]['line_length']) +def test__max_diff_windowed(): + samples_per_window = 3 + sample_interval = 1 + x = pd.Series(np.arange(0, 7)**2.) + H = hankel(np.arange(samples_per_window), + np.arange(samples_per_window-1, len(x))) + expected = {} + expected['max_diff'] = pd.Series( + data=[np.nan, 3., 5., 7., 9., 11., np.nan], index=x.index) + result = clearsky._line_length_windowed( + x, H, samples_per_window, sample_interval) + assert_series_equal(result, expected['max_diff']) + + def test__calc_stats(): # stats are hand-computed assuming window = 3 and sample_interval = 1 samples_per_window = 3 From 20130d8e752fa7510316351f675452d29a1e62a3 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 10:17:18 -0700 Subject: [PATCH 46/60] uncomplicate tests, remove align options --- pvlib/tests/test_clearsky.py | 51 ++++++++++++------------------------ 1 file changed, 17 insertions(+), 34 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 5b982da9d7..36db8456f4 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -635,26 +635,17 @@ def test_detect_clearsky_missing_index(detect_clearsky_data): def test__line_length_windowed(): # sqt is hand-calculated assuming window=3 samples_per_window = 3 - alignments = ['center'] # 'left' and 'right' could be added in the future - shift = {'center': -1} # 'left': -2, 'right': 0 + sample_interval = 1 x = pd.Series(np.arange(0, 7)**2.) # line length between adjacent points sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) H = hankel(np.arange(samples_per_window), np.arange(samples_per_window-1, len(sqt))) expected = {} - for align in alignments: - expected[align] = {} - s = shift[align] - line_length = sqt + sqt.shift(-1) - expected[align]['line_length'] = line_length.shift(s + 1) - expected[align]['data'] = x - for align in expected: - data = expected[align]['data'] - sample_interval = 1 - result = clearsky._line_length_windowed( - data, H, samples_per_window, sample_interval) - assert_series_equal(result, expected[align]['line_length']) + expected['line_length'] = sqt + sqt.shift(-1) + result = clearsky._line_length_windowed( + x, H, samples_per_window, sample_interval) + assert_series_equal(result, expected['line_length']) def test__max_diff_windowed(): @@ -675,8 +666,6 @@ def test__calc_stats(): # stats are hand-computed assuming window = 3 and sample_interval = 1 samples_per_window = 3 sample_interval = 1 - alignments = ['center'] # 'left' and 'right' could be added in the future - shift = {'center': -1} # 'left': -2, 'right': 0 x = pd.Series(np.arange(0, 7)**2.) mean_x = pd.Series(np.array([np.nan, np.nan, 5, 14, 29, 50, 77]) / 3.) max_x = pd.Series(np.array([np.nan, np.nan, 4, 9, 16, 25, 36])) @@ -687,24 +676,18 @@ def test__calc_stats(): H = hankel(np.arange(samples_per_window), np.arange(samples_per_window-1, len(x))) expected = {} - for align in alignments: - expected[align] = {} - s = shift[align] - expected[align]['mean'] = mean_x.shift(s) - expected[align]['max'] = max_x.shift(s) - # slope between adjacent points - expected[align]['slope'] = slope - expected[align]['slope_nstd'] = slope_nstd.shift(s) - expected[align]['data'] = x - for align in expected: - data = expected[align]['data'] - result = clearsky._calc_stats(data, samples_per_window, - sample_interval, H) - res_mean, res_max, res_slope_nstd, res_slope = result - assert_series_equal(res_mean, expected[align]['mean']) - assert_series_equal(res_max, expected[align]['max']) - assert_series_equal(res_slope_nstd, expected[align]['slope_nstd']) - assert_series_equal(res_slope, expected[align]['slope']) + expected['mean'] = mean_x + expected['max'] = max_x + # slope between adjacent points + expected['slope'] = slope + expected['slope_nstd'] = slope_nstd + result = clearsky._calc_stats( + x, samples_per_window, sample_interval, H) + res_mean, res_max, res_slope_nstd, res_slope = result + assert_series_equal(res_mean, expected['mean']) + assert_series_equal(res_max, expected['max']) + assert_series_equal(res_slope_nstd, expected['slope_nstd']) + assert_series_equal(res_slope, expected['slope']) def test_bird(): From 9d64abea520a21e2fceef6cda5e7f5aea057d3da Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 10:21:13 -0700 Subject: [PATCH 47/60] move common data to fixture --- pvlib/tests/test_clearsky.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 36db8456f4..d9de59b02e 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -632,8 +632,8 @@ def test_detect_clearsky_missing_index(detect_clearsky_data): clearsky.detect_clearsky(expected['GHI'].values, cs['ghi'].values) -def test__line_length_windowed(): - # sqt is hand-calculated assuming window=3 +@pytest.fixture +def detect_clearsky_helper_data(): samples_per_window = 3 sample_interval = 1 x = pd.Series(np.arange(0, 7)**2.) @@ -641,6 +641,14 @@ def test__line_length_windowed(): sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) H = hankel(np.arange(samples_per_window), np.arange(samples_per_window-1, len(sqt))) + return x, samples_per_window, sample_interval, H + + +def test__line_length_windowed(detect_clearsky_helper_data): + x, samples_per_window, sample_interval, H = detect_clearsky_helper_data + # sqt is hand-calculated assuming window=3 + # line length between adjacent points + sqt = pd.Series(np.sqrt(np.array([np.nan, 2., 10., 26., 50., 82, 122.]))) expected = {} expected['line_length'] = sqt + sqt.shift(-1) result = clearsky._line_length_windowed( @@ -648,12 +656,8 @@ def test__line_length_windowed(): assert_series_equal(result, expected['line_length']) -def test__max_diff_windowed(): - samples_per_window = 3 - sample_interval = 1 - x = pd.Series(np.arange(0, 7)**2.) - H = hankel(np.arange(samples_per_window), - np.arange(samples_per_window-1, len(x))) +def test__max_diff_windowed(detect_clearsky_helper_data): + x, samples_per_window, sample_interval, H = detect_clearsky_helper_data expected = {} expected['max_diff'] = pd.Series( data=[np.nan, 3., 5., 7., 9., 11., np.nan], index=x.index) @@ -662,19 +666,15 @@ def test__max_diff_windowed(): assert_series_equal(result, expected['max_diff']) -def test__calc_stats(): +def test__calc_stats(detect_clearsky_helper_data): + x, samples_per_window, sample_interval, H = detect_clearsky_helper_data # stats are hand-computed assuming window = 3 and sample_interval = 1 - samples_per_window = 3 - sample_interval = 1 - x = pd.Series(np.arange(0, 7)**2.) mean_x = pd.Series(np.array([np.nan, np.nan, 5, 14, 29, 50, 77]) / 3.) max_x = pd.Series(np.array([np.nan, np.nan, 4, 9, 16, 25, 36])) diff_std = np.array([np.nan, np.nan, np.sqrt(2), np.sqrt(2), np.sqrt(2), np.sqrt(2), np.sqrt(2)]) slope_nstd = diff_std / mean_x slope = x.diff().shift(-1) - H = hankel(np.arange(samples_per_window), - np.arange(samples_per_window-1, len(x))) expected = {} expected['mean'] = mean_x expected['max'] = max_x From 0eda545434b42bddca698151da4afeeb37156870 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 13:04:32 -0700 Subject: [PATCH 48/60] index correctly --- pvlib/clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index e1fcc58228..8b0c8d1e34 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -641,7 +641,7 @@ def _calc_stats(data, samples_per_window, sample_interval, H): def _slope_nstd_windowed(data, H, samples_per_window): with np.errstate(divide='ignore', invalid='ignore'): raw = np.diff(data) - raw = raw[H[:-1, ]].std(ddof=1, axis=0) / data[H].mean(axis=0) + raw = raw[H[:-1, ]].std(ddof=1, axis=0) / data.values[H].mean(axis=0) return _to_centered_series(raw, data.index, samples_per_window) From 4eb70e1ac26f5e37cd96b25d31dc4f8703f5c03d Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 13:16:16 -0700 Subject: [PATCH 49/60] correct cut/paste/delete errors --- pvlib/tests/test_clearsky.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index d9de59b02e..49ea873271 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -661,14 +661,15 @@ def test__max_diff_windowed(detect_clearsky_helper_data): expected = {} expected['max_diff'] = pd.Series( data=[np.nan, 3., 5., 7., 9., 11., np.nan], index=x.index) - result = clearsky._line_length_windowed( + result = clearsky._max_diff_windowed( x, H, samples_per_window, sample_interval) assert_series_equal(result, expected['max_diff']) def test__calc_stats(detect_clearsky_helper_data): x, samples_per_window, sample_interval, H = detect_clearsky_helper_data - # stats are hand-computed assuming window = 3 and sample_interval = 1 + # stats are hand-computed assuming window = 3, sample_interval = 1, + # and right-aligned labels mean_x = pd.Series(np.array([np.nan, np.nan, 5, 14, 29, 50, 77]) / 3.) max_x = pd.Series(np.array([np.nan, np.nan, 4, 9, 16, 25, 36])) diff_std = np.array([np.nan, np.nan, np.sqrt(2), np.sqrt(2), np.sqrt(2), @@ -676,11 +677,11 @@ def test__calc_stats(detect_clearsky_helper_data): slope_nstd = diff_std / mean_x slope = x.diff().shift(-1) expected = {} - expected['mean'] = mean_x - expected['max'] = max_x + expected['mean'] = mean_x.shift(-1) # shift to align to center + expected['max'] = max_x.shift(-1) # slope between adjacent points expected['slope'] = slope - expected['slope_nstd'] = slope_nstd + expected['slope_nstd'] = slope_nstd.shift(-1) result = clearsky._calc_stats( x, samples_per_window, sample_interval, H) res_mean, res_max, res_slope_nstd, res_slope = result From 983bd6f867607b8c09d14dba3ebea3c3ab2ffd08 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Thu, 24 Dec 2020 14:43:12 -0700 Subject: [PATCH 50/60] proofread it next time --- pvlib/tests/test_clearsky.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 49ea873271..9bebd239cb 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -661,8 +661,7 @@ def test__max_diff_windowed(detect_clearsky_helper_data): expected = {} expected['max_diff'] = pd.Series( data=[np.nan, 3., 5., 7., 9., 11., np.nan], index=x.index) - result = clearsky._max_diff_windowed( - x, H, samples_per_window, sample_interval) + result = clearsky._max_diff_windowed(x, H, samples_per_window) assert_series_equal(result, expected['max_diff']) From bdee2ce211f24c4eb8093313b615fdddd772bb1e Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 28 Dec 2020 08:57:43 -0700 Subject: [PATCH 51/60] replace roller with numpy --- pvlib/clearsky.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 8b0c8d1e34..389478cee7 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -626,9 +626,10 @@ def _calc_stats(data, samples_per_window, sample_interval, H): difference between successive data points """ - roller = data.rolling(samples_per_window, center=True) - data_mean = roller.mean() - data_max = roller.max() + data_mean = np.mean(data[H], axis=0) + data_mean = _to_centered_series(data_mean, data.index, samples_per_window) + data_max = np.max(data[H], axis=0) + data_max = _to_centered_series(data_max, data.index, samples_per_window) # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) data_slope = data_diff / sample_interval From 26e36a21676f28821b65db8f59f8a8b676391546 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 28 Dec 2020 09:10:19 -0700 Subject: [PATCH 52/60] try this dimensioning --- pvlib/clearsky.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 389478cee7..91d82a134e 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -626,9 +626,9 @@ def _calc_stats(data, samples_per_window, sample_interval, H): difference between successive data points """ - data_mean = np.mean(data[H], axis=0) + data_mean = data[H].mean(axis=0) data_mean = _to_centered_series(data_mean, data.index, samples_per_window) - data_max = np.max(data[H], axis=0) + data_max = data[H].max(axis=0) data_max = _to_centered_series(data_max, data.index, samples_per_window) # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) From 05d8bc1e9c8796a0a6adda9ee71ea2581ee78e7d Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 28 Dec 2020 09:20:23 -0700 Subject: [PATCH 53/60] try numpy arrays instead --- pvlib/clearsky.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 91d82a134e..57a9162468 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -626,9 +626,9 @@ def _calc_stats(data, samples_per_window, sample_interval, H): difference between successive data points """ - data_mean = data[H].mean(axis=0) + data_mean = data.values[H].mean(axis=0) data_mean = _to_centered_series(data_mean, data.index, samples_per_window) - data_max = data[H].max(axis=0) + data_max = data.values[H].max(axis=0) data_max = _to_centered_series(data_max, data.index, samples_per_window) # shift to get forward difference, .diff() is backward difference instead data_diff = data.diff().shift(-1) From 5b4e441a338f8bd7a8a90c2627ba3a2a044d78a7 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 28 Dec 2020 09:26:55 -0700 Subject: [PATCH 54/60] remove unneeded conversion to Series --- pvlib/clearsky.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 57a9162468..896a2a2ce3 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -825,17 +825,6 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # be polite about returning the same type as was input ispandas = isinstance(measured, pd.Series) - # for internal use, need a Series - if not ispandas: - meas = pd.Series(measured, index=times) - else: - meas = measured - - if not isinstance(clearsky, pd.Series): - clear = pd.Series(clearsky, index=times) - else: - clear = clearsky - sample_interval, samples_per_window = _get_sample_intervals(times, window_length) @@ -845,13 +834,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # calculate measurement statistics meas_mean, meas_max, meas_slope_nstd, meas_slope \ - = _calc_stats(meas, samples_per_window, sample_interval, H) + = _calc_stats(measured, samples_per_window, sample_interval, H) meas_line_length = _line_length_windowed( - meas, H, samples_per_window, sample_interval) + measured, H, samples_per_window, sample_interval) # calculate clear sky statistics clear_mean, clear_max, _, clear_slope \ - = _calc_stats(clear, samples_per_window, sample_interval, H) + = _calc_stats(clearsky, samples_per_window, sample_interval, H) # find a scaling factor for the clear sky time series that minimizes the # RMSE between the clear times identified in the measured data and the @@ -861,13 +850,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # at least 50% of the day being identified as clear. alpha = 1 for iteration in range(max_iterations): - scaled_clear = alpha * clear + scaled_clear = alpha * clearsky clear_line_length = _line_length_windowed( scaled_clear, H, samples_per_window, sample_interval) line_diff = meas_line_length - clear_line_length slope_max_diff = _max_diff_windowed( - meas - scaled_clear, H, samples_per_window) + measured - scaled_clear, H, samples_per_window) # evaluate comparison criteria c1 = np.abs(meas_mean - alpha*clear_mean) < mean_diff c2 = np.abs(meas_max - alpha*clear_max) < max_diff @@ -878,7 +867,7 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return - clear_samples = np.full_like(meas, False, dtype='bool') + clear_samples = np.full_like(measured, False, dtype='bool') # find the samples contained in any window classified as clear idx = _clear_sample_index(clear_windows, samples_per_window, 'center', H) @@ -886,8 +875,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # find a new alpha previous_alpha = alpha - clear_meas = meas[clear_samples] - clear_clear = clear[clear_samples] + clear_meas = measured[clear_samples] + clear_clear = clearsky[clear_samples] def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) From e53572d9fbe115a05021cf09ecee4b39efb1c88e Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 28 Dec 2020 09:52:17 -0700 Subject: [PATCH 55/60] Revert "remove unneeded conversion to Series" This reverts commit 5b4e441a338f8bd7a8a90c2627ba3a2a044d78a7. --- pvlib/clearsky.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 896a2a2ce3..57a9162468 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -825,6 +825,17 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # be polite about returning the same type as was input ispandas = isinstance(measured, pd.Series) + # for internal use, need a Series + if not ispandas: + meas = pd.Series(measured, index=times) + else: + meas = measured + + if not isinstance(clearsky, pd.Series): + clear = pd.Series(clearsky, index=times) + else: + clear = clearsky + sample_interval, samples_per_window = _get_sample_intervals(times, window_length) @@ -834,13 +845,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # calculate measurement statistics meas_mean, meas_max, meas_slope_nstd, meas_slope \ - = _calc_stats(measured, samples_per_window, sample_interval, H) + = _calc_stats(meas, samples_per_window, sample_interval, H) meas_line_length = _line_length_windowed( - measured, H, samples_per_window, sample_interval) + meas, H, samples_per_window, sample_interval) # calculate clear sky statistics clear_mean, clear_max, _, clear_slope \ - = _calc_stats(clearsky, samples_per_window, sample_interval, H) + = _calc_stats(clear, samples_per_window, sample_interval, H) # find a scaling factor for the clear sky time series that minimizes the # RMSE between the clear times identified in the measured data and the @@ -850,13 +861,13 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # at least 50% of the day being identified as clear. alpha = 1 for iteration in range(max_iterations): - scaled_clear = alpha * clearsky + scaled_clear = alpha * clear clear_line_length = _line_length_windowed( scaled_clear, H, samples_per_window, sample_interval) line_diff = meas_line_length - clear_line_length slope_max_diff = _max_diff_windowed( - measured - scaled_clear, H, samples_per_window) + meas - scaled_clear, H, samples_per_window) # evaluate comparison criteria c1 = np.abs(meas_mean - alpha*clear_mean) < mean_diff c2 = np.abs(meas_max - alpha*clear_max) < max_diff @@ -867,7 +878,7 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, clear_windows = c1 & c2 & c3 & c4 & c5 & c6 # create array to return - clear_samples = np.full_like(measured, False, dtype='bool') + clear_samples = np.full_like(meas, False, dtype='bool') # find the samples contained in any window classified as clear idx = _clear_sample_index(clear_windows, samples_per_window, 'center', H) @@ -875,8 +886,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, # find a new alpha previous_alpha = alpha - clear_meas = measured[clear_samples] - clear_clear = clearsky[clear_samples] + clear_meas = meas[clear_samples] + clear_clear = clear[clear_samples] def rmse(alpha): return np.sqrt(np.mean((clear_meas - alpha*clear_clear)**2)) From ea8234885dfdfd72cd2e0e344b3be2deda591248 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Mon, 28 Dec 2020 13:42:32 -0700 Subject: [PATCH 56/60] param ndays in benchmark --- benchmarks/benchmarks/detect_clearsky.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmarks/detect_clearsky.py b/benchmarks/benchmarks/detect_clearsky.py index d42e38a692..7f2039f13c 100644 --- a/benchmarks/benchmarks/detect_clearsky.py +++ b/benchmarks/benchmarks/detect_clearsky.py @@ -8,10 +8,12 @@ class DetectClear: + params = [1, 10, 100] # number of days + param_names = ['ndays'] - def setup(self): + def setup(self, ndays): self.times = pd.date_range(start='20180601', freq='1min', - periods=14400) + periods=1440*ndays) self.lat = 35.1 self.lon = -106.6 self.solar_position = solarposition.get_solarposition( @@ -26,7 +28,7 @@ def setup(self): self.measured *= 0.98 self.window_length = 10 - def time_detect_clearsky(self): + def time_detect_clearsky(self, ndays): clearsky.detect_clearsky( self.measured, self.clearsky, self.times, self.window_length ) From 970227dc8b42c5ff8c68a8081d465431429f6d94 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 29 Dec 2020 09:50:45 -0700 Subject: [PATCH 57/60] Line formatting Co-authored-by: Will Holmgren --- pvlib/clearsky.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 57a9162468..9b92ce73db 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -844,8 +844,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, np.arange(samples_per_window-1, len(times))) # calculate measurement statistics - meas_mean, meas_max, meas_slope_nstd, meas_slope \ - = _calc_stats(meas, samples_per_window, sample_interval, H) + meas_mean, meas_max, meas_slope_nstd, meas_slope = _calc_stats( + meas, samples_per_window, sample_interval, H) meas_line_length = _line_length_windowed( meas, H, samples_per_window, sample_interval) From 79a329c35988ffaf7bca1ea0cd41e7bd47547dad Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 29 Dec 2020 09:51:00 -0700 Subject: [PATCH 58/60] More line formatting Co-authored-by: Will Holmgren --- pvlib/clearsky.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 9b92ce73db..010b139226 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -850,8 +850,8 @@ def detect_clearsky(measured, clearsky, times=None, window_length=10, meas, H, samples_per_window, sample_interval) # calculate clear sky statistics - clear_mean, clear_max, _, clear_slope \ - = _calc_stats(clear, samples_per_window, sample_interval, H) + clear_mean, clear_max, _, clear_slope = _calc_stats( + clear, samples_per_window, sample_interval, H) # find a scaling factor for the clear sky time series that minimizes the # RMSE between the clear times identified in the measured data and the From 7af5f55ac027d714d1d9e63c5959776c1c43ca25 Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Tue, 29 Dec 2020 09:51:18 -0700 Subject: [PATCH 59/60] Remove unneeded declaration Co-authored-by: Will Holmgren --- pvlib/clearsky.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pvlib/clearsky.py b/pvlib/clearsky.py index 010b139226..857c5dd202 100644 --- a/pvlib/clearsky.py +++ b/pvlib/clearsky.py @@ -905,7 +905,6 @@ def rmse(alpha): clear_samples = pd.Series(clear_samples, index=times) if return_components: - components = OrderedDict() components = OrderedDict() components['mean_diff_flag'] = c1 components['max_diff_flag'] = c2 From be1829b3face8007b79ed098e10f21287785a33a Mon Sep 17 00:00:00 2001 From: Cliff Hansen Date: Mon, 4 Jan 2021 14:45:55 -0700 Subject: [PATCH 60/60] fix test error, .all -> .any --- pvlib/tests/test_clearsky.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/tests/test_clearsky.py b/pvlib/tests/test_clearsky.py index 9bebd239cb..fd886f224c 100644 --- a/pvlib/tests/test_clearsky.py +++ b/pvlib/tests/test_clearsky.py @@ -581,7 +581,7 @@ def test_detect_clearsky_iterations(detect_clearsky_data): clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=1) assert clear_samples[:'2012-04-01 10:41:00'].all() - assert not clear_samples['2012-04-01 10:42:00':].all() # expected False + assert not clear_samples['2012-04-01 10:42:00':].any() # expected False clear_samples = clearsky.detect_clearsky( expected['GHI'], cs['ghi']*alpha, max_iterations=20) assert_series_equal(expected['Clear or not'], clear_samples,