Skip to content

Commit 988e4ba

Browse files
authored
Support downloading a DMR file in earthdata-varinfo (#60)
1 parent 4fa8f31 commit 988e4ba

File tree

7 files changed

+248
-10
lines changed

7 files changed

+248
-10
lines changed

CHANGELOG.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
## v3.1.1
2-
### Unreleased
1+
## v3.2.0
2+
### 2025-07-25
33

44
### Changed:
55

@@ -11,6 +11,11 @@
1111
rule. There is no filtering based on the variable path of the requested
1212
variables in the input set to `VarInfoBase.get_required_variables`.
1313

14+
### Added:
15+
16+
* Support for getting an OPeNDAP url with `cmr_search.get_dmr_xml_url` and
17+
a `use_dmr=True` flag to `varinfo/generate_umm_var.generate_collection_umm_var`
18+
1419
## v3.1.0
1520
### 2025-03-25
1621

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ umm_var_json = generate_collection_umm_var(<Production collection concept ID>,
120120
umm_var_json = generate_collection_umm_var(<UAT collection concept ID>,
121121
<authorization header>,
122122
publish=True)
123+
124+
# Use a DMR file to generate UMM-Var, defaults to UAT, and not to publish:
125+
umm_var_json = generate_collection_umm_var(<UAT collection concept ID>,
126+
<authorization header>)
127+
128+
# To generate and publish records from a DMR file for a UAT collection
129+
# (note the authorization header must contain a LaunchPad token):
130+
umm_var_json = generate_collection_umm_var(<UAT collection concept ID>,
131+
<authorization header>,
132+
publish=True, use_dmr=True)
123133
```
124134

125135
Expected outputs:

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.1.0
1+
3.2.0

tests/unit/test_cmr_search.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from varinfo.cmr_search import (
1212
get_granules,
1313
get_granule_link,
14+
get_dmr_xml_url,
1415
download_granule,
1516
get_edl_token_from_launchpad,
1617
get_edl_token_header,
@@ -519,3 +520,120 @@ def test_get_edl_token_header_with_edl_token(self):
519520
"""
520521
test_bearer_token = get_edl_token_header(self.bearer_token_header, CMR_UAT)
521522
self.assertEqual(test_bearer_token, self.bearer_token_header)
523+
524+
def test_get_dmr_xml_url_raises(self):
525+
"""Check if MissingGranuleDownloadLinks is raised with get_dmr_xml_url,
526+
when the "links" attribute in the "RelatedUrls" of a granule response don't contain
527+
the correct fields for an OPeNDAP service url.
528+
"""
529+
530+
# Nested dict does not contain key `links`
531+
granule_response_no_links = [
532+
{'no_links': [{'rel': 'http://esipfed.org/ns/fedsearch/1.1/s3#'}]}
533+
]
534+
535+
# Nested dict contains `links` but `rel` does not end in '/service#'
536+
granule_response_links_no_rel = [
537+
{'links': [{'rel': 'http://cool-science-data.#'}]}
538+
]
539+
540+
# Nested dict contains "inherited" key
541+
granule_response_links_rel_inherit = [
542+
{
543+
'links': [
544+
{
545+
'rel': 'http://cool-science-data/1.1/service#',
546+
'href': 'http://cool-science-data/data/.nc4',
547+
'inherited': True,
548+
}
549+
]
550+
}
551+
]
552+
553+
granule_response_links_empty = [{'links': []}]
554+
555+
granule_response_links_no_opendap = [
556+
{
557+
'links': [
558+
{
559+
'rel': 'http://cool-science-data/1.1/service#',
560+
'title': 'OPeNDAP request URL (GET DATA : OPENDAP DATA)',
561+
'hreflang': 'en-US',
562+
'href': 'https://fake.earthdata.nasa.gov/example.nc4',
563+
}
564+
]
565+
}
566+
]
567+
568+
with self.subTest('Granule has no `links` key'):
569+
with self.assertRaises(MissingGranuleDownloadLinks) as context_manager:
570+
get_dmr_xml_url(granule_response_no_links)
571+
self.assertEqual(
572+
f'No links for granule record: {str(granule_response_no_links)}',
573+
str(context_manager.exception),
574+
)
575+
576+
with self.subTest(
577+
'Granule has `links`' 'but `rel` does not end with `/service#`'
578+
):
579+
with self.assertRaises(MissingGranuleDownloadLinks):
580+
get_dmr_xml_url(granule_response_links_no_rel)
581+
582+
with self.subTest(
583+
'Granule has `links` and `rel` ends in `/service#`, but contains `inherited` field.'
584+
):
585+
with self.assertRaises(MissingGranuleDownloadLinks):
586+
get_dmr_xml_url(granule_response_links_rel_inherit)
587+
588+
with self.subTest('Granule has `links` but it is an empty list'):
589+
with self.assertRaises(MissingGranuleDownloadLinks):
590+
get_dmr_xml_url(granule_response_links_empty)
591+
592+
with self.subTest(
593+
'Granule has `links` and `rel` ends in `/service#`, but opendap is NOT in url.'
594+
):
595+
with self.assertRaises(MissingGranuleDownloadLinks):
596+
get_dmr_xml_url(granule_response_links_no_opendap)
597+
598+
def test_get_dmr_xml_url(self):
599+
"""Assert the correct OPeNDAP url is returned for a good granule response
600+
(i.e. when the "links" attribute in the "RelatedUrls" of a granule response has
601+
the correct fields for an OPeNDAP service url).
602+
"""
603+
granule_response_links_correct_cloud_opendap = [
604+
{
605+
'links': [
606+
{
607+
'rel': 'http://cool-science-data/1.1/service#',
608+
'title': 'OPeNDAP request URL (GET DATA : OPENDAP DATA)',
609+
'hreflang': 'en-US',
610+
'href': 'https://fake.opendap.earthdata.nasa.gov/example.hdf',
611+
}
612+
]
613+
}
614+
]
615+
616+
granule_response_links_correct_onprem_opendap = [
617+
{
618+
'links': [
619+
{
620+
'rel': 'http://cool-science-data/1.1/service#',
621+
'type': 'application/x-hdf',
622+
'title': 'OPeNDAP request URL (GET DATA : OPENDAP DATA)',
623+
'hreflang': 'en-US',
624+
'href': 'https://some.fake.server.onprem.opendap.nasa.gov/example.hdf',
625+
}
626+
]
627+
}
628+
]
629+
with self.subTest('Cloud OPeNDAP Related Urls response'):
630+
self.assertEqual(
631+
get_dmr_xml_url(granule_response_links_correct_cloud_opendap),
632+
'https://fake.opendap.earthdata.nasa.gov/example.hdf.dmr.xml',
633+
)
634+
635+
with self.subTest('Onprem OPeNDAP Related Urls response'):
636+
self.assertEqual(
637+
get_dmr_xml_url(granule_response_links_correct_onprem_opendap),
638+
'https://some.fake.server.onprem.opendap.nasa.gov/example.hdf.dmr.xml',
639+
)

tests/unit/test_generate_umm_var.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ def setUpClass(cls):
2020
cls.launchpad_token_header = 'launchpad-token'
2121
cls.netcdf4_basename = 'f16_ssmis_20210426v7.nc'
2222
cls.netcdf4_url = f'https://example.com/{cls.netcdf4_basename}'
23+
cls.example_dmr_basename = 'foo_HDF_EOS_granule.hdf'
24+
cls.opendap_url = (
25+
f'https://fake.opendap.earthdata.nasa.gov/{cls.example_dmr_basename}'
26+
)
27+
cls.opendap_xml_download_url = f'https://fake.opendap.earthdata.nasa.gov/{cls.example_dmr_basename}.dmr.xml'
2328
cls.query_granule_return = [
2429
{
2530
'links': [
@@ -30,6 +35,18 @@ def setUpClass(cls):
3035
]
3136
}
3237
]
38+
cls.query_granule_return_opendap = [
39+
{
40+
'links': [
41+
{
42+
'rel': 'http://esipfed.org/ns/fedsearch/1.1/service#',
43+
'title': 'OPeNDAP request URL (GET DATA : OPENDAP DATA)',
44+
'hreflang': 'en-US',
45+
'href': cls.opendap_url,
46+
}
47+
]
48+
}
49+
]
3350
cls.rssmif16d_variables = [
3451
'atmosphere_cloud_liquid_water_content',
3552
'atmosphere_water_vapor_content',
@@ -53,6 +70,18 @@ def download_granule_side_effect(granule_link, auth_header, out_directory):
5370
netcdf4_file_path = 'tests/unit/data/f16_ssmis_20210426v7.nc'
5471
return copy(netcdf4_file_path, out_directory)
5572

73+
@staticmethod
74+
def download_dmr_side_effect(granule_link, auth_header, out_directory):
75+
"""A helper method that will copy a test dmr file to the temporary
76+
directory being used for a specific test.
77+
78+
Static methods do not have access to class attributes, so the test
79+
file path is defined in this method as well as setUpClass.
80+
81+
"""
82+
dmr_file_path = 'tests/unit/data/M2I3NPASM_example.dmr'
83+
return copy(dmr_file_path, out_directory)
84+
5685
@patch('varinfo.umm_var.publish_umm_var')
5786
@patch('varinfo.cmr_search.GranuleQuery')
5887
@patch('varinfo.generate_umm_var.download_granule')
@@ -144,6 +173,52 @@ def test_generate_collection_umm_var_with_publication(
144173
# Ensure the output looks as expected
145174
self.assertSetEqual(set(published_umm_var), set(expected_concept_ids))
146175

176+
@patch('varinfo.umm_var.publish_umm_var')
177+
@patch('varinfo.generate_umm_var.get_dmr_xml_url')
178+
@patch('varinfo.cmr_search.GranuleQuery')
179+
@patch('varinfo.generate_umm_var.download_granule')
180+
def test_generate_collection_umm_var_dmr(
181+
self,
182+
mock_download_granule,
183+
mock_granule_query,
184+
mock_get_dmr_xml_url,
185+
mock_publish_umm_var,
186+
):
187+
"""Test an end-to-end request for a DMR file."""
188+
mock_granule_query.return_value.get.return_value = (
189+
self.query_granule_return_opendap
190+
)
191+
192+
# Add side effect that will copy test file to the temporary directory,
193+
# simulating a download.
194+
mock_download_granule.side_effect = self.download_dmr_side_effect
195+
196+
# Set return value for get_dmr_xml_url
197+
mock_get_dmr_xml_url.return_value = self.opendap_xml_download_url
198+
199+
# Check call arguments when use_dmr=True
200+
generate_collection_umm_var(
201+
self.collection_concept_id,
202+
self.bearer_token_header,
203+
use_dmr=True,
204+
)
205+
206+
mock_get_dmr_xml_url.assert_called_once_with(self.query_granule_return_opendap)
207+
# Ensure the granule query used expected query parameters
208+
mock_granule_query.return_value.parameters.assert_called_once_with(
209+
downloadable=True,
210+
sort_key='-start_date',
211+
concept_id=self.collection_concept_id,
212+
)
213+
214+
# Ensure the call to download the granule had correct parameters
215+
mock_download_granule.assert_called_once_with(
216+
self.opendap_xml_download_url, self.bearer_token_header, out_directory=ANY
217+
)
218+
219+
# Check that no attempt was made to publish a UMM-Var record to CMR:
220+
mock_publish_umm_var.assert_not_called()
221+
147222
@patch('varinfo.umm_var.publish_umm_var')
148223
@patch('varinfo.cmr_search.GranuleQuery')
149224
@patch('varinfo.generate_umm_var.download_granule')

varinfo/cmr_search.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,3 +190,22 @@ def get_edl_token_header(auth_header: str, cmr_env: CmrEnvType) -> str:
190190
else:
191191
edl_auth_header = auth_header
192192
return edl_auth_header
193+
194+
195+
def get_dmr_xml_url(granule_response: Sequence) -> str:
196+
"""Gets the OPeNDAP url from a CMR granule response and appends `.dmr.xml`"""
197+
granule_link = next(
198+
(
199+
link['href']
200+
for link in granule_response[0].get('links', [])
201+
if link['rel'].endswith('/service#')
202+
and 'inherited' not in link
203+
and 'opendap' in link['href']
204+
),
205+
None,
206+
)
207+
208+
if granule_link is None:
209+
raise MissingGranuleDownloadLinks(granule_response)
210+
211+
return granule_link + '.dmr.xml'

varinfo/generate_umm_var.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@
1818

1919
from cmr import CMR_UAT
2020

21-
from varinfo import VarInfoFromNetCDF4
21+
from varinfo import VarInfoFromNetCDF4, VarInfoFromDmr
2222
from varinfo.cmr_search import (
2323
CmrEnvType,
2424
download_granule,
2525
get_granule_link,
2626
get_granules,
2727
get_edl_token_header,
28+
get_dmr_xml_url,
2829
)
2930
from varinfo.umm_var import get_all_umm_var, publish_all_umm_var
3031

@@ -40,6 +41,7 @@ def generate_collection_umm_var(
4041
cmr_env: CmrEnvType = CMR_UAT,
4142
publish: bool = False,
4243
config_file: str | None = None,
44+
use_dmr: bool = False,
4345
) -> UmmVarReturnType:
4446
"""Run all the of the functions for downloading and publishing
4547
a UMM-Var entry to CMR given:
@@ -64,20 +66,29 @@ def generate_collection_umm_var(
6466
collection_concept_id, cmr_env=cmr_env, auth_header=auth_header_edl_token
6567
)
6668

67-
# Get the data download URL for the most recent granule (NetCDF-4 file)
68-
granule_link = get_granule_link(granule_response)
69+
if use_dmr:
70+
# Get OPeNDAP data URL with `.dml.xml` appended
71+
granule_link = get_dmr_xml_url(granule_response)
72+
73+
else:
74+
# Get the data download URL for the most recent granule (NetCDF-4 file)
75+
granule_link = get_granule_link(granule_response)
6976

7077
with TemporaryDirectory() as temp_dir:
7178
# Download file to runtime environment
7279
local_granule = download_granule(
7380
granule_link, auth_header_edl_token, out_directory=temp_dir
7481
)
7582

76-
# Parse the granule with VarInfo to map all variables and relations:
77-
var_info = VarInfoFromNetCDF4(local_granule, config_file=config_file)
83+
if use_dmr:
84+
# Parse the granule with VarInfoFromDmr to map all variables and relations:
85+
var_info = VarInfoFromDmr(local_granule, config_file=config_file)
86+
else:
87+
# Parse the granule with VarInfoFromNetCDF4 to map all variables and relations:
88+
var_info = VarInfoFromNetCDF4(local_granule, config_file=config_file)
7889

79-
# Generate all the UMM-Var records:
80-
all_umm_var_records = get_all_umm_var(var_info)
90+
# Generate all the UMM-Var records:
91+
all_umm_var_records = get_all_umm_var(var_info)
8192

8293
if publish:
8394
# Publish to CMR and construct an output object that is a list of

0 commit comments

Comments
 (0)