-
Notifications
You must be signed in to change notification settings - Fork 52
Open
Description
Hi!
I'm exploring using stackstac to use Sentinel-5 data from Planetary Computer. As I'm aware, these files are in netcdf format, and not on COG, which is making this a little dificult to work with:
start_date = "2024-01-01"
end_date = "2024-12-30"
time_range = f"{start_date}/{end_date}"
aoi_path = "aoi_1_4326_part_1.geojson"
aoi = gpd.read_file(aoi_path)
min_time_step = 30
output_directory = r"C:\Users\fuji_\methane-mapping"
bounds = aoi.total_bounds
geometry = box(bounds[0], bounds[1], bounds[2], bounds[3])
wkt_aoi = geometry.wkt
geojson_aoi = mapping(geometry)
print(wkt_aoi)
# Search for Sentinel-5p L2 data using the Planetary Computer STAC API
sentinel_search_url = "https://planetarycomputer.microsoft.com/api/stac/v1"
sentinel_stac_client = pystac_client.Client.open(sentinel_search_url)
items = sentinel_stac_client.search(
bbox=bounds,
collections=["sentinel-5p-l2-netcdf"],
datetime=f"{start_date}/{end_date}",
query={"s5p:processing_mode": {"eq": "OFFL"}, "s5p:product_name": {"eq": "ch4"}},
).item_collection()
print(f"Total number of items found: {len(items)}")
scene_dates = sorted([item.datetime for item in items])
selected_scenes = [scene_dates[0]]
for i in range(1, len(scene_dates)):
if (scene_dates[i] - selected_scenes[-1]).days >= min_time_step:
selected_scenes.append(scene_dates[i])
items = [
item for item in items if item.datetime in selected_scenes
]
print(f"Number of items after minimum time step filtering: {len(items)}")
print("\nSelected scenes after filtering:")
for item in items:
print(f"Scene date: {item.datetime}")
# Print available assets in the first item
first_item = items[0]
# Check available assets
print("Available assets:", list(first_item.assets.keys()))
## Must sign to the PC STAC assets to get access
for item in items:
for asset_key, asset in item.assets.items():
asset.href = pc_sign(asset.href)
sentinel_stack = stackstac.stack(
items,
assets=["ch4"],
gdal_env=stackstac.DEFAULT_GDAL_ENV.updated({
'GDAL_HTTP_MAX_RETRY': 3,
'GDAL_HTTP_RETRY_DELAY': 5,
'AWS_NO_SIGN_REQUEST': 'YES',
}),
epsg=4326,
resolution=0.0629,
chunksize=(1, 1, 1000, 1000),
).to_dataset(dim='band')
# This length number represents the number of assets (bands) that are to be extracted
# len(sentinel_stack)
# Remove attributes that are not time, y or x
sentinel_stack = sentinel_stack.drop_vars([c for c in sentinel_stack.coords if not (c in ['time', 'y', 'x',])])
sentinel_stack_float32 = sentinel_stack.astype('float32')
crs = "EPSG:4326"
crs_number = crs[5:]
sentinel_stack_float32 = sentinel_stack_float32.rio.write_crs(fr"{crs}", inplace=True)
sentinel_stack_clipped = sentinel_stack_float32.rio.clip(aoi.geometry, aoi.crs, all_touched=True, drop=True)
with ProgressBar():
sentinel_stack_clipped = sentinel_stack_clipped.persist()
[# ] | 4% Completed | 16.44 sms
[c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:325](file:///C:/Users/fuji_/anaconda3/envs/geo/Lib/site-packages/stackstac/rio_reader.py:325): NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
ds = SelfCleaningDatasetReader(self.url, sharing=False)
[# ] | 4% Completed | 16.60 s
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[25], line 3
1 sentinel_stack_clipped = sentinel_stack_float32.rio.clip(aoi.geometry, aoi.crs, all_touched=True, drop=True)
2 with ProgressBar():
----> 3 sentinel_stack_clipped = sentinel_stack_clipped.persist()
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\xarray\core\dataset.py:1117, in Dataset.persist(self, **kwargs)
1093 """Trigger computation, keeping data as chunked arrays.
1094
1095 This operation can be used to trigger computation on underlying dask
(...)
1114 dask.persist
1115 """
1116 new = self.copy(deep=False)
-> 1117 return new._persist_inplace(**kwargs)
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\xarray\core\dataset.py:1085, in Dataset._persist_inplace(self, **kwargs)
1082 chunkmanager = get_chunked_array_type(*lazy_data.values())
1084 # evaluate all the dask arrays simultaneously
-> 1085 evaluated_data = chunkmanager.persist(*lazy_data.values(), **kwargs)
1087 for k, data in zip(lazy_data, evaluated_data, strict=False):
1088 self.variables[k].data = data
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\xarray\namedarray\daskmanager.py:90, in DaskManager.persist(self, *data, **kwargs)
87 def persist(self, *data: Any, **kwargs: Any) -> tuple[DaskArray | Any, ...]:
88 from dask import persist
---> 90 return persist(*data, **kwargs)
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\dask\base.py:1001, in persist(traverse, optimize_graph, scheduler, *args, **kwargs)
998 postpersists.append((rebuild, a_keys, state))
1000 with shorten_traceback():
-> 1001 results = schedule(dsk, keys, **kwargs)
1003 d = dict(zip(keys, results))
1004 results2 = [r({k: d[k] for k in ks}, *s) for r, ks, s in postpersists]
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\to_dask.py:189, in fetch_raster_window(reader_table, slices, dtype, fill_value)
182 # Only read if the window we're fetching actually overlaps with the asset
183 if windows.intersect(current_window, asset_window):
184 # NOTE: when there are multiple assets, we _could_ parallelize these reads with our own threadpool.
185 # However, that would probably increase memory usage, since the internal, thread-local GDAL datasets
186 # would end up copied to even more threads.
187
188 # TODO when the Reader won't be rescaling, support passing `output` to avoid the copy?
--> 189 data = reader.read(current_window)
191 if all_empty:
192 # Turn `output` from a broadcast-trick array to a real array, so it's writeable
193 if (
194 np.isnan(data)
195 if np.isnan(fill_value)
196 else np.equal(data, fill_value)
197 ).all():
198 # Unless the data we just read is all empty anyway
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:383, in AutoParallelRioReader.read(self, window, **kwargs)
382 def read(self, window: Window, **kwargs) -> np.ndarray:
--> 383 reader = self.dataset
384 try:
385 result = reader.read(
386 window=window,
387 out_dtype=self.dtype,
(...)
391 **kwargs,
392 )
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:379, in AutoParallelRioReader.dataset(self)
377 with self._dataset_lock:
378 if self._dataset is None:
--> 379 self._dataset = self._open()
380 return self._dataset
File c:\Users\fuji_\anaconda3\envs\geo\Lib\site-packages\stackstac\rio_reader.py:338, in AutoParallelRioReader._open(self)
336 nr_of_bands = ds.count
337 ds.close()
--> 338 raise RuntimeError(
339 f"Assets must have exactly 1 band, but file {self.url!r} has {nr_of_bands}. "
340 "We can't currently handle multi-band rasters (each band has to be "
341 "a separate STAC asset), so you'll need to exclude this asset from your analysis."
342 )
344 # Only make a VRT if the dataset doesn't match the spatial spec we want
345 if self.spec.vrt_params != {
346 "crs": ds.crs.to_epsg(),
347 "transform": ds.transform,
348 "height": ds.height,
349 "width": ds.width,
350 }:
RuntimeError: Assets must have exactly 1 band, but file 'https://sentinel5euwest.blob.core.windows.net/sentinel-5p/TROPOMI/L2__HCHO__/2024/03/01/S5P_OFFL_L2__HCHO___20240301T104708_20240301T122838_33072_03_020601_20240303T031309/S5P_OFFL_L2__HCHO___20240301T104708_20240301T122838_33072_03_020601_20240303T031309.nc?st=2025-03-19T17%3A09%3A51Z&se=2025-03-20T17%3A54%3A51Z&sp=rl&sv=2024-05-04&sr=c&skoid=9c8ff44a-6a2c-4dfb-b298-1c9212f64d9a&sktid=72f988bf-86f1-41af-91ab-2d7cd011db47&skt=2025-03-20T00%3A01%3A16Z&ske=2025-03-27T00%3A01%3A16Z&sks=b&skv=2024-05-04&sig=Lffz9KetmMI4xjoy%2BB7hhsD9wfIxOn5PVS/BYm72G4Q%3D' has 0. We can't currently handle multi-band rasters (each band has to be a separate STAC asset), so you'll need to exclude this asset from your analysis.
Any way to solve this? I basically want the methane_mixing_ratio band that should be inside "ch4" asset, but I can't extract it.
Metadata
Metadata
Assignees
Labels
No labels