Skip to content

Sentinel-5 P data usage in stackstac. RuntimeError: Assets must have exactly 1 band #268

@JPPereira93

Description

@JPPereira93

Hi!

I'm exploring using stackstac to use Sentinel-5 data from Planetary Computer. As I'm aware, these files are in netcdf format, and not on COG, which is making this a little dificult to work with:

start_date = "2024-01-01" 
end_date = "2024-12-30"
time_range = f"{start_date}/{end_date}"

aoi_path = "aoi_1_4326_part_1.geojson"
aoi = gpd.read_file(aoi_path)
min_time_step = 30

output_directory = r"C:\Users\fuji_\methane-mapping"

bounds = aoi.total_bounds
geometry = box(bounds[0], bounds[1], bounds[2], bounds[3])  
wkt_aoi = geometry.wkt 
geojson_aoi = mapping(geometry)  

print(wkt_aoi)

# Search for Sentinel-5p L2 data using the Planetary Computer STAC API

sentinel_search_url = "https://planetarycomputer.microsoft.com/api/stac/v1"
sentinel_stac_client = pystac_client.Client.open(sentinel_search_url)

items = sentinel_stac_client.search(
    bbox=bounds,
    collections=["sentinel-5p-l2-netcdf"],
    datetime=f"{start_date}/{end_date}",
    query={"s5p:processing_mode": {"eq": "OFFL"}, "s5p:product_name": {"eq": "ch4"}},
).item_collection()

print(f"Total number of items found: {len(items)}")

scene_dates = sorted([item.datetime for item in items])

selected_scenes = [scene_dates[0]]
for i in range(1, len(scene_dates)):
    if (scene_dates[i] - selected_scenes[-1]).days >= min_time_step:
        selected_scenes.append(scene_dates[i])

items = [
    item for item in items if item.datetime in selected_scenes
]

print(f"Number of items after minimum time step filtering: {len(items)}")

print("\nSelected scenes after filtering:")
for item in items:
    print(f"Scene date: {item.datetime}")

# Print available assets in the first item

first_item = items[0]

# Check available assets
print("Available assets:", list(first_item.assets.keys()))

## Must sign to the PC STAC assets to get access 

for item in items:
    for asset_key, asset in item.assets.items():
        asset.href = pc_sign(asset.href)


sentinel_stack = stackstac.stack(
    items,
    assets=["ch4"],
    gdal_env=stackstac.DEFAULT_GDAL_ENV.updated({
        'GDAL_HTTP_MAX_RETRY': 3,
        'GDAL_HTTP_RETRY_DELAY': 5,
        'AWS_NO_SIGN_REQUEST': 'YES',
    }),
    epsg=4326,
    resolution=0.0629, 
    chunksize=(1, 1, 1000, 1000),
).to_dataset(dim='band')

# This length number represents the number  of assets (bands) that are to be extracted
# len(sentinel_stack)

# Remove attributes that are not time, y or x 
sentinel_stack = sentinel_stack.drop_vars([c for c in sentinel_stack.coords if not (c in ['time', 'y', 'x',])])
sentinel_stack_float32 = sentinel_stack.astype('float32')

crs = "EPSG:4326"
crs_number = crs[5:]
sentinel_stack_float32 = sentinel_stack_float32.rio.write_crs(fr"{crs}", inplace=True)

sentinel_stack_clipped = sentinel_stack_float32.rio.clip(aoi.geometry, aoi.crs, all_touched=True, drop=True)
with ProgressBar():
    sentinel_stack_clipped = sentinel_stack_clipped.persist()

[#                                       ] | 4% Completed | 16.44 sms
[c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:325](file:///C:/Users/fuji_/anaconda3/envs/geo/Lib/site-packages/stackstac/rio_reader.py:325): NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
  ds = SelfCleaningDatasetReader(self.url, sharing=False)
[#                                       ] | 4% Completed | 16.60 s

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[25], line 3
      1 sentinel_stack_clipped = sentinel_stack_float32.rio.clip(aoi.geometry, aoi.crs, all_touched=True, drop=True)
      2 with ProgressBar():
----> 3     sentinel_stack_clipped = sentinel_stack_clipped.persist()

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\xarray\core\dataset.py:1117, in Dataset.persist(self, **kwargs)
   1093 """Trigger computation, keeping data as chunked arrays.
   1094 
   1095 This operation can be used to trigger computation on underlying dask
   (...)
   1114 dask.persist
   1115 """
   1116 new = self.copy(deep=False)
-> 1117 return new._persist_inplace(**kwargs)

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\xarray\core\dataset.py:1085, in Dataset._persist_inplace(self, **kwargs)
   1082 chunkmanager = get_chunked_array_type(*lazy_data.values())
   1084 # evaluate all the dask arrays simultaneously
-> 1085 evaluated_data = chunkmanager.persist(*lazy_data.values(), **kwargs)
   1087 for k, data in zip(lazy_data, evaluated_data, strict=False):
   1088     self.variables[k].data = data

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\xarray\namedarray\daskmanager.py:90, in DaskManager.persist(self, *data, **kwargs)
     87 def persist(self, *data: Any, **kwargs: Any) -> tuple[DaskArray | Any, ...]:
     88     from dask import persist
---> 90     return persist(*data, **kwargs)

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\dask\base.py:1001, in persist(traverse, optimize_graph, scheduler, *args, **kwargs)
    998     postpersists.append((rebuild, a_keys, state))
   1000 with shorten_traceback():
-> 1001     results = schedule(dsk, keys, **kwargs)
   1003 d = dict(zip(keys, results))
   1004 results2 = [r({k: d[k] for k in ks}, *s) for r, ks, s in postpersists]

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\to_dask.py:189, in fetch_raster_window(reader_table, slices, dtype, fill_value)
    182 # Only read if the window we're fetching actually overlaps with the asset
    183 if windows.intersect(current_window, asset_window):
    184     # NOTE: when there are multiple assets, we _could_ parallelize these reads with our own threadpool.
    185     # However, that would probably increase memory usage, since the internal, thread-local GDAL datasets
    186     # would end up copied to even more threads.
    187 
    188     # TODO when the Reader won't be rescaling, support passing `output` to avoid the copy?
--> 189     data = reader.read(current_window)
    191     if all_empty:
    192         # Turn `output` from a broadcast-trick array to a real array, so it's writeable
    193         if (
    194             np.isnan(data)
    195             if np.isnan(fill_value)
    196             else np.equal(data, fill_value)
    197         ).all():
    198             # Unless the data we just read is all empty anyway

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:383, in AutoParallelRioReader.read(self, window, **kwargs)
    382 def read(self, window: Window, **kwargs) -> np.ndarray:
--> 383     reader = self.dataset
    384     try:
    385         result = reader.read(
    386             window=window,
    387             out_dtype=self.dtype,
   (...)
    391             **kwargs,
    392         )

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:379, in AutoParallelRioReader.dataset(self)
    377 with self._dataset_lock:
    378     if self._dataset is None:
--> 379         self._dataset = self._open()
    380     return self._dataset

File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:338, in AutoParallelRioReader._open(self)
    336     nr_of_bands = ds.count
    337     ds.close()
--> 338     raise RuntimeError(
    339         f"Assets must have exactly 1 band, but file {self.url!r} has {nr_of_bands}. "
    340         "We can't currently handle multi-band rasters (each band has to be "
    341         "a separate STAC asset), so you'll need to exclude this asset from your analysis."
    342     )
    344 # Only make a VRT if the dataset doesn't match the spatial spec we want
    345 if self.spec.vrt_params != {
    346     "crs": ds.crs.to_epsg(),
    347     "transform": ds.transform,
    348     "height": ds.height,
    349     "width": ds.width,
    350 }:

RuntimeError: Assets must have exactly 1 band, but file 'https://sentinel5euwest.blob.core.windows.net/sentinel-5p/TROPOMI/L2__HCHO__/2024/03/01/S5P_OFFL_L2__HCHO___20240301T104708_20240301T122838_33072_03_020601_20240303T031309/S5P_OFFL_L2__HCHO___20240301T104708_20240301T122838_33072_03_020601_20240303T031309.nc?st=2025-03-19T17%3A09%3A51Z&se=2025-03-20T17%3A54%3A51Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-03-20T00%3A01%3A16Z&ske=2025-03-27T00%3A01%3A16Z&sks=b&skv=2024-05-04&sig=Lffz9KetmMI4xjoy%2BB7hhsD9wfIxOn5PVS/BYm72G4Q%3D' has 0. We can't currently handle multi-band rasters (each band has to be a separate STAC asset), so you'll need to exclude this asset from your analysis.

Any way to solve this? I basically want the methane_mixing_ratio band that should be inside "ch4" asset, but I can't extract it.

Netcdf file example: https://sentinel5euwest.blob.core.windows.net/sentinel-5p/TROPOMI/L2__CH4___/2024/01/06/S5P_OFFL_L2__CH4____20240106T093259_20240106T111429_32291_03_020600_20240108T014844/S5P_OFFL_L2__CH4____20240106T093259_20240106T111429_32291_03_020600_20240108T014844.nc?st=2025-03-20T16%3A30%3A54Z&se=2025-03-21T17%3A15%3A54Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-03-20T00%3A01%3A16Z&ske=2025-03-27T00%3A01%3A16Z&sks=b&skv=2024-05-04&sig=jnGWXNIxsY7BqHZeC8qJO0OB43mtx9GYYJpSeZrbSfo%3D

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions