pvlib · wholmgren · Oct 29, 2018 · Oct 16, 2018 · Oct 16, 2018 · Oct 16, 2018
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
@@ -322,6 +322,8 @@ relevant to solar energy modeling.
    iotools.read_srml
    iotools.read_srml_month_from_solardat
    iotools.read_surfrad
+   iotools.read_midc
+   iotools.read_midc_raw_data_from_nrel
 
 A :py:class:`~pvlib.location.Location` object may be created from metadata
 in some files.

diff --git a/docs/sphinx/source/whatsnew/v0.6.1.rst b/docs/sphinx/source/whatsnew/v0.6.1.rst
@@ -36,6 +36,8 @@ Enhancements
   :py:func:`pvlib.iotools.read_srml_month_from_solardat` to read University of
   Oregon Solar Radiation Monitoring Laboratory data. (:issue:`589`)
 * Created :py:func:`pvlib.iotools.read_surfrad` to read NOAA SURFRAD data. (:issue:`590`)
+* Created :py:func:`pvlib.iotools.read_midc` and :py:func:`pvlib.iotools.read_midc_raw_data_from_nrel`
+  to read NREL MIDC data. (:issue:`601`)
 
 Bug fixes
 ~~~~~~~~~

diff --git a/pvlib/data/midc_20181014.txt b/pvlib/data/midc_20181014.txt
diff --git a/pvlib/data/midc_raw_20181018.txt b/pvlib/data/midc_raw_20181018.txt
diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py
@@ -3,3 +3,5 @@
 from pvlib.iotools.srml import read_srml  # noqa: F401
 from pvlib.iotools.srml import read_srml_month_from_solardat  # noqa: F401
 from pvlib.iotools.surfrad import read_surfrad  # noqa: F401
+from pvlib.iotools.midc import read_midc  # noqa: F401
+from pvlib.iotools.midc import read_midc_raw_data_from_nrel  # noqa: F401
diff --git a/pvlib/iotools/midc.py b/pvlib/iotools/midc.py
@@ -0,0 +1,196 @@
+"""Functions to read NREL MIDC data.
+"""
+from functools import partial
+import pandas as pd
+
+# VARIABLE_MAP is a dictionary mapping partial MIDC field names to their
+# pvlib names. See docstring of read_midc for description.
+
+VARIABLE_MAP = {
+    'Direct': 'dni',
+    'Global': 'ghi',
+    'Diffuse': 'dhi',
+    'Airmass': 'airmass',
+    'Azimuth Angle': 'solar_azimuth',
+    'Zenith Angle': 'solar_zenith',
+    'Air Temperature': 'temp_air',
+    'Temperature': 'temp_air',
+    'Dew Point Temp': 'temp_dew',
+    'Relative Humidity': 'relative_humidity',
+}
+
+# Maps problematic timezones to 'Etc/GMT' for parsing.
+
+TZ_MAP = {
+    'PST': 'Etc/GMT+8',
+    'CST': 'Etc/GMT+6',
+}
+
+
+def map_midc_to_pvlib(variable_map, field_name):
+    """A mapper function to rename Dataframe columns to their pvlib counterparts.
+
+    Parameters
+    ----------
+    variable_map: Dictionary
+        A dictionary for mapping MIDC field name to pvlib name. See
+        VARIABLE_MAP for default value and description of how to construct
+        this argument.
+    field_name: string
+        The Column to map.
+
+    Returns
+    -------
+    label: string
+        The pvlib variable name associated with the MIDC field or the input if
+        a mapping does not exist.
+
+    Notes
+    -----
+    Will fail if field_name to be mapped matches an entry in VARIABLE_MAP and
+    does not contain brackets. This should not be an issue unless MIDC file
+    headers are updated.
+
+    """
+    new_field_name = field_name
+    for midc_name, pvlib_name in variable_map.items():
+        if field_name.startswith(midc_name):
+            # extract the instrument and units field and then remove units
+            instrument_units = field_name[len(midc_name):]
+            units_index = instrument_units.find('[')
+            instrument = instrument_units[:units_index - 1]
+            new_field_name = pvlib_name + instrument.replace(' ', '_')
+            break
+    return new_field_name
+
+
+def format_index(data):
+    """Create DatetimeIndex for the Dataframe localized to the timezone provided
+    as the label of the second (time) column.
+
+    Parameters
+    ----------
+    data: Dataframe
+        Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
+        with the timezone and contain times in 'HH:MM' format.
+
+    Returns
+    -------
+    data: Dataframe
+        Dataframe with DatetimeIndex localized to the provided timezone.
+    """
+    tz_raw = data.columns[1]
+    timezone = TZ_MAP.get(tz_raw, tz_raw)
+    datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
+    datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
+    data = data.set_index(datetime)
+    data = data.tz_localize(timezone)
+    return data
+
+
+def format_index_raw(data):
+    """Create DatetimeIndex for the Dataframe localized to the timezone provided
+    as the label of the third column.
+
+    Parameters
+    ----------
+    data: Dataframe
+        Must contain columns 'Year' and 'DOY'. Timezone must be found as the
+        label of the third (time) column.
+
+    Returns
+    -------
+    data: Dataframe
+        The data with a Datetime index localized to the provided timezone.
+    """
+    tz_raw = data.columns[3]
+    timezone = TZ_MAP.get(tz_raw, tz_raw)
+    year = data.Year.apply(str)
+    jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
+    time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
+    index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
+    data = data.set_index(index)
+    data = data.tz_localize(timezone)
+    return data
+
+
+def read_midc(filename, variable_map=VARIABLE_MAP, raw_data=False):
+    """Read in National Renewable Energy Laboratory Measurement and
+    Instrumentation Data Center [1]_ weather data.
+
+    Parameters
+    ----------
+    filename: string
+        Filename or url of data to read.
+    variable_map: dictionary
+        Dictionary for mapping MIDC field names to pvlib names. See variable
+        `VARIABLE_MAP` for default and Notes section below for a description of
+        its format.
+    raw_data: boolean
+        Set to true to use format_index_raw to correctly format the date/time
+        columns of MIDC raw data files.
+
+    Returns
+    -------
+    data: Dataframe
+        A dataframe with DatetimeIndex localized to the provided timezone.
+
+    Notes
+    -----
+    Keys of the `variable_map` dictionary should include the first part
+    of a MIDC field name which indicates the variable being measured.
+
+        e.g. 'Global PSP [W/m^2]' is entered as a key of 'Global'
+
+    The 'PSP' indicating instrument is appended to the pvlib variable name
+    after mapping to differentiate measurements of the same variable. For a
+    full list of pvlib variable names see the `Variable Style Rules
+    <https://pvlib-python.readthedocs.io/en/latest/variables_style_rules.html>`_.
+
+    Be sure to check the units for the variables you will use on the
+    `MIDC site <https://midcdmz.nrel.gov/>`_.
+
+    References
+    ----------
+    .. [1] NREL: Measurement and Instrumentation Data Center
+        `https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
+    """
+    data = pd.read_csv(filename)
+    if raw_data:
+        data = format_index_raw(data)
+    else:
+        data = format_index(data)
+    mapper = partial(map_midc_to_pvlib, variable_map)
+    data = data.rename(columns=mapper)
+    return data
+
+
+def read_midc_raw_data_from_nrel(site, start, end):
+    """Request and read MIDC data directly from the raw data api.
+
+    Parameters
+    ----------
+    site: string
+        The MIDC station id.
+    start: datetime
+        Start date for requested data.
+    end: datetime
+        End date for requested data.
+
+    Returns
+    -------
+    data:
+        Dataframe with DatetimeIndex localized to the station location.
+
+    Notes
+    -----
+    Requests spanning an instrumentation change will yield an error. See the
+    MIDC raw data api page here_ for more details and considerations.
+      .. _here: https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist
+    """
+    args = {'site': site,
+            'begin': start.strftime('%Y%m%d'),
+            'end': end.strftime('%Y%m%d')}
+    endpoint = 'https://midcdmz.nrel.gov/apps/data_api.pl?'
+    url = endpoint + '&'.join(['{}={}'.format(k, v) for k, v in args.items()])
+    return read_midc(url, raw_data=True)
diff --git a/pvlib/test/test_midc.py b/pvlib/test/test_midc.py
@@ -0,0 +1,73 @@
+import inspect
+import os
+
+import pandas as pd
+from pandas.util.testing import network
+import pytest
+import pytz
+
+from pvlib.iotools import midc
+
+
+test_dir = os.path.dirname(
+    os.path.abspath(inspect.getfile(inspect.currentframe())))
+midc_testfile = os.path.join(test_dir, '../data/midc_20181014.txt')
+midc_raw_testfile = os.path.join(test_dir, '../data/midc_raw_20181018.txt')
+midc_network_testfile = ('https://midcdmz.nrel.gov/apps/data_api.pl'
+                         '?site=UAT&begin=20181018&end=20181019')
+
+
+@pytest.mark.parametrize('field_name,expected', [
+    ('Temperature @ 2m [deg C]', 'temp_air_@_2m'),
+    ('Global PSP [W/m^2]', 'ghi_PSP'),
+    ('Temperature @ 50m [deg C]', 'temp_air_@_50m'),
+    ('Other Variable [units]', 'Other Variable [units]'),
+])
+def test_read_midc_mapper_function(field_name, expected):
+    assert midc.map_midc_to_pvlib(midc.VARIABLE_MAP, field_name) == expected
+
+
+def test_midc_format_index():
+    data = pd.read_csv(midc_testfile)
+    data = midc.format_index(data)
+    start = pd.Timestamp("20181014 00:00")
+    start = start.tz_localize("MST")
+    end = pd.Timestamp("20181014 23:59")
+    end = end.tz_localize("MST")
+    assert type(data.index) == pd.DatetimeIndex
+    assert data.index[0] == start
+    assert data.index[-1] == end
+
+
+def test_midc_format_index_tz_conversion():
+    data = pd.read_csv(midc_testfile)
+    data = data.rename(columns={'MST': 'PST'})
+    data = midc.format_index(data)
+    assert data.index[0].tz == pytz.timezone('Etc/GMT+8')
+
+
+def test_midc_format_index_raw():
+    data = pd.read_csv(midc_raw_testfile)
+    data = midc.format_index_raw(data)
+    start = pd.Timestamp('20181018 00:00')
+    start = start.tz_localize('MST')
+    end = pd.Timestamp('20181018 23:59')
+    end = end.tz_localize('MST')
+    assert data.index[0] == start
+    assert data.index[-1] == end
+
+
+def test_read_midc_var_mapping_as_arg():
+    data = midc.read_midc(midc_testfile, variable_map=midc.VARIABLE_MAP)
+    assert 'ghi_PSP' in data.columns
+    assert 'temp_air_@_2m' in data.columns
+    assert 'temp_air_@_50m' in data.columns
+
+
+@network
+def test_read_midc_raw_data_from_nrel():
+    start_ts = pd.Timestamp('20181018')
+    end_ts = pd.Timestamp('20181019')
+    data = midc.read_midc_raw_data_from_nrel('UAT', start_ts, end_ts)
+    assert 'dni_Normal' in data.columns
+    assert data.index.size == 2880