pvlib · cwhanse · Feb 25, 2019 · Feb 19, 2019 · Feb 19, 2019 · Feb 19, 2019
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
@@ -337,6 +337,7 @@ relevant to solar energy modeling.
    iotools.read_midc_raw_data_from_nrel
    iotools.read_ecmwf_macc
    iotools.get_ecmwf_macc
+   iotools.read_crn
 
 A :py:class:`~pvlib.location.Location` object may be created from metadata
 in some files.

diff --git a/docs/sphinx/source/whatsnew/v0.6.2.rst b/docs/sphinx/source/whatsnew/v0.6.2.rst
@@ -16,7 +16,7 @@ API Changes
 
 Enhancements
 ~~~~~~~~~~~~
-
+* Add US CRN data reader to `pvlib.iotools`.
 
 Bug fixes
 ~~~~~~~~~

diff --git a/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt b/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt
@@ -0,0 +1,3 @@
+53131 20190101 0005 20181231 1705      3 -111.17   32.24    10.4     0.0     10 0     9.0 C 0    52 0 -99.000 -9999.0  1144 0   2.20 0
+53131 20190101 0010 20181231 1710      3 -111.17   32.24    10.5     0.0 -99.00 0     9.0 C 0    52 0 -99.000 -9999.0    19 0   2.95 0
+53131 20190101 0015 20181231 1715      3 -111.17   32.24    -9999.0  0.0      9 0     8.9 C 0    52 0 -99.000 -9999.0    19 0   3.25 0
diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py
@@ -7,3 +7,4 @@
 from pvlib.iotools.midc import read_midc_raw_data_from_nrel  # noqa: F401
 from pvlib.iotools.ecmwf_macc import read_ecmwf_macc  # noqa: F401
 from pvlib.iotools.ecmwf_macc import get_ecmwf_macc  # noqa: F401
+from pvlib.iotools.crn import read_crn  # noqa: F401
diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py
@@ -0,0 +1,82 @@
+"""Functions to read data from the US Climate Reference Network (CRN).
+"""
+
+import pandas as pd
+import numpy as np
+
+
+HEADERS = 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG'  # noqa: E501
+
+VARIABLE_MAP = {
+    'LONGITUDE': 'longitude',
+    'LATITUDE': 'latitude',
+    'AIR_TEMPERATURE': 'temp_air',
+    'SOLAR_RADIATION': 'ghi',
+    'SR_FLAG': 'ghi_flag',
+    'RELATIVE_HUMIDITY': 'relative_humidity',
+    'RH_FLAG': 'relative_humidity_flag',
+    'WIND_1_5': 'wind_speed',
+    'WIND_FLAG': 'wind_speed_flag'
+}
+
+# specify dtypes for potentially problematic values
+DTYPES = {
+    'AIR_TEMPERATURE': np.float64,
+    'SOLAR_RADIATION': np.float64,
+    'RELATIVE_HUMIDITY': np.float64,
+    'WIND_1_5': np.float64,
+}
+
+
+def read_crn(filename):
+    """
+    Read NOAA USCRN [1] fixed-width file into pandas dataframe.
+
+    Parameters
+    ----------
+    filename: str
+        filepath or url to read for the tsv file.
+
+    Returns
+    -------
+    data: Dataframe
+        A dataframe with datetime index and all of the variables listed
+        in the `VARIABLE_MAP` dict inside of the map_columns function,
+        along with their associated quality control flags.
+
+    Notes
+    -----
+    CRN files contain 5 minute averages labeled by the interval ending
+    time. Here, missing data is flagged as NaN, rather than the lowest
+    possible integer for a field (e.g. -999 or -99).
+    Air temperature in deg C.
+    Wind speed in m/s at a height of 1.5 m above ground level.
+
+    References
+    ----------
+    [1] U.S. Climate Reference Network
+        `https://www.ncdc.noaa.gov/crn/qcdatasets.html <https://www.ncdc.noaa.gov/crn/qcdatasets.html>`_
+    [2] Diamond, H. J. et. al., 2013: U.S. Climate Reference Network after
+        one decade of operations: status and assessment. Bull. Amer.
+        Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1`
+    """
+
+    # read in data
+    data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '),
+                       dtype=DTYPES)
+
+    # set index
+    # UTC_TIME does not have leading 0s, so must zfill(4) to comply
+    # with %H%M format
+    dts = data[['UTC_DATE', 'UTC_TIME']].astype(str)
+    dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4),
+                             format='%Y%m%d%H%M', utc=True)
+    data = data.set_index(dtindex)
+
+    # set nans
+    for val in [-99, -999, -9999]:
+        data = data.where(data != val, np.nan)
+
+    data = data.rename(columns=VARIABLE_MAP)
+
+    return data
diff --git a/pvlib/test/test_crn.py b/pvlib/test/test_crn.py
@@ -0,0 +1,48 @@
+import inspect
+import os
+
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+import numpy as np
+from numpy import dtype, nan
+
+from pvlib.iotools import crn
+
+
+test_dir = os.path.dirname(
+    os.path.abspath(inspect.getfile(inspect.currentframe())))
+testfile = os.path.join(test_dir,
+                        '../data/CRNS0101-05-2019-AZ_Tucson_11_W.txt')
+
+
+def test_read_crn():
+    columns = [
+        'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
+        'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi', 'ghi_flag',
+        'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity',
+        'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5',
+        'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']
+    index = pd.DatetimeIndex(['2019-01-01 00:05:00+00:00',
+                              '2019-01-01 00:10:00+00:00',
+                              '2019-01-01 00:15:00+00:00'],
+                             dtype='datetime64[ns, UTC]', freq=None)
+    values = np.array([
+       [53131, 20190101, 5, 20181231, 1705, 3, -111.17, 32.24, 10.4, 0.0,
+        10.0, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 1144, 0, 2.2, 0],
+       [53131, 20190101, 10, 20181231, 1710, 3, -111.17, 32.24, 10.5,
+        0.0, nan, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 19, 0, 2.95, 0],
+       [53131, 20190101, 15, 20181231, 1715, 3, -111.17, 32.24, nan, 0.0,
+        9.0, 0, 8.9, 'C', 0, 52.0, 0, nan, nan, 19, 0, 3.25, 0]])
+    dtypes = [
+        dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
+        dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'),
+        dtype('float64'), dtype('float64'), dtype('float64'),
+        dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
+        dtype('float64'), dtype('int64'), dtype('float64'),
+        dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
+        dtype('int64')]
+    expected = pd.DataFrame(values, columns=columns, index=index)
+    for (col, _dtype) in zip(expected.columns, dtypes):
+        expected[col] = expected[col].astype(_dtype)
+    out = crn.read_crn(testfile)
+    assert_frame_equal(out, expected)