Skip to content

Commit 7836a36

Browse files
authored
Scale offset from item asset (#202)
1 parent acaa55a commit 7836a36

File tree

6 files changed

+40
-20
lines changed

6 files changed

+40
-20
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ It's a good idea to use `conda` to handle installing rasterio on Windows. There'
7272
* Transfer the STAC metadata into [xarray coordinates](http://xarray.pydata.org/en/stable/data-structures.html#coordinates) for easy indexing, filtering, and provenance of metadata.
7373
* Efficiently generate a Dask graph for loading the data in parallel.
7474
* Mediate between Dask's parallelism and GDAL's aversion to it, allowing for fast, multi-threaded reads when possible, and at least preventing segfaults when not.
75-
* Mask nodata and rescale by dataset-level scales/offsets.
75+
* Mask nodata and rescale by STAC item asset scales/offsets.
7676
* Display data in interactive maps in a notebook, computed on the fly by Dask.
7777

7878
## Limitations:

stackstac/prepare.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@
2323
import xarray as xr
2424

2525
from .raster_spec import IntFloat, Bbox, Resolutions, RasterSpec
26+
2627
from .stac_types import ItemSequence
2728
from . import accumulate_metadata, geom_utils
2829

29-
ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4)])
30+
ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4), ("scale_offset", "float64", 2)])
3031

3132

3233
class Mimetype(NamedTuple):
@@ -143,6 +144,22 @@ def prepare_items(
143144
asset_bbox = asset.get("proj:bbox", item_bbox)
144145
asset_shape = asset.get("proj:shape", item_shape)
145146
asset_transform = asset.get("proj:transform", item_transform)
147+
raster_bands = asset.get('raster:bands')
148+
149+
if raster_bands is not None:
150+
if len(raster_bands) != 1:
151+
raise ValueError(
152+
f"raster:bands has {len(raster_bands)} elements for asset {asset_id!r}. "
153+
"Multi-band rasters are not currently supported.\n"
154+
"If you don't care about this asset, you can skip it by giving a list "
155+
"of asset IDs you *do* want in `assets=`, and leaving this one out."
156+
)
157+
asset_scale = raster_bands[0].get('scale', 1)
158+
asset_offset = raster_bands[0].get('offset', 0)
159+
else:
160+
asset_scale = 1
161+
asset_offset = 0
162+
146163
asset_affine = None
147164

148165
# Auto-compute CRS
@@ -322,7 +339,7 @@ def prepare_items(
322339
continue
323340

324341
# Phew, we figured out all the spatial stuff! Now actually store the information we care about.
325-
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj)
342+
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj, (asset_scale, asset_offset))
326343
# ^ NOTE: If `asset_bbox_proj` is None, NumPy automatically converts it to NaNs
327344

328345
# At this point, everything has been set (or there was as error)

stackstac/reader_protocol.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def __init__(
3434
resampling: Resampling,
3535
dtype: np.dtype,
3636
fill_value: Union[int, float],
37-
rescale: bool,
37+
scale_offset: Tuple[Union[int, float], Union[int, float]],
3838
gdal_env: Optional[LayeredEnv],
3939
errors_as_nodata: Tuple[Exception, ...] = (),
4040
) -> None:
@@ -55,8 +55,6 @@ def __init__(
5555
fill_value:
5656
Fill nodata pixels in the output array with this value.
5757
If None, whatever nodata value is set in the asset will be used.
58-
rescale:
59-
Rescale the output array according to any scales and offsets set in the asset.
6058
gdal_env:
6159
A `~.LayeredEnv` of GDAL configuration options to use while opening
6260
and reading datasets. If None (default), `~.DEFAULT_GDAL_ENV` is used.

stackstac/rio_reader.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ def _curthread():
3232

3333
# /TODO
3434

35-
3635
# Default GDAL configuration options
3736
DEFAULT_GDAL_ENV = LayeredEnv(
3837
always=dict(
@@ -64,11 +63,12 @@ def _curthread():
6463
# See `ThreadLocalRioDataset` for more.
6564
# https://github.com/pangeo-data/pangeo-example-notebooks/issues/21#issuecomment-432457955
6665
# https://gdal.org/drivers/raster/vrt.html#multi-threading-issues
66+
6767
MULTITHREADED_DRIVER_ALLOWLIST = {"GTiff"}
6868

6969

7070
class ThreadsafeRioDataset(Protocol):
71-
scale_offset: Tuple[float, float]
71+
scale_offset: Tuple[Union[int, float], Union[int, float]]
7272

7373
def read(self, window: Window, **kwargs) -> np.ndarray:
7474
...
@@ -280,7 +280,7 @@ class PickleState(TypedDict):
280280
resampling: Resampling
281281
dtype: np.dtype
282282
fill_value: Union[int, float]
283-
rescale: bool
283+
scale_offset: Tuple[Union[int, float], Union[int, float]]
284284
gdal_env: Optional[LayeredEnv]
285285
errors_as_nodata: Tuple[Exception, ...]
286286

@@ -303,16 +303,16 @@ def __init__(
303303
resampling: Resampling,
304304
dtype: np.dtype,
305305
fill_value: Union[int, float],
306-
rescale: bool,
306+
scale_offset: Tuple[Union[int, float], Union[int, float]],
307307
gdal_env: Optional[LayeredEnv] = None,
308308
errors_as_nodata: Tuple[Exception, ...] = (),
309309
) -> None:
310310
self.url = url
311311
self.spec = spec
312312
self.resampling = resampling
313313
self.dtype = dtype
314-
self.rescale = rescale
315314
self.fill_value = fill_value
315+
self.scale_offset = scale_offset
316316
self.gdal_env = gdal_env or DEFAULT_GDAL_ENV
317317
self.errors_as_nodata = errors_as_nodata
318318

@@ -398,12 +398,12 @@ def read(self, window: Window, **kwargs) -> np.ndarray:
398398

399399
raise RuntimeError(msg) from e
400400

401-
if self.rescale:
402-
scale, offset = reader.scale_offset
403-
if scale != 1:
404-
result *= scale
405-
if offset != 0:
406-
result += offset
401+
scale, offset = self.scale_offset
402+
403+
if scale != 1:
404+
result *= scale
405+
if offset != 0:
406+
result += offset
407407

408408
result = np.ma.filled(result, fill_value=self.fill_value)
409409
assert np.issubdtype(result.dtype, self.dtype), (
@@ -436,7 +436,7 @@ def __getstate__(
436436
"resampling": self.resampling,
437437
"dtype": self.dtype,
438438
"fill_value": self.fill_value,
439-
"rescale": self.rescale,
439+
"scale_offset": self.scale_offset,
440440
"gdal_env": self.gdal_env,
441441
"errors_as_nodata": self.errors_as_nodata,
442442
}

stackstac/stack.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ def stack(
202202
be represented in a smaller data type (like ``uint16``), using a different ``fill_value``
203203
(like 0) and managing it yourself could save a lot of memory.
204204
rescale:
205-
Whether to rescale pixel values by the scale and offset set on the dataset.
205+
Whether to rescale pixel values by the scale and offset present in the ``raster:bands`` metadata
206+
for each asset.
206207
Default: True. Note that this could produce floating-point data when the
207208
original values are ints, so set ``dtype`` accordingly. You will NOT be warned
208209
if the cast to ``dtype`` is losing information!

stackstac/to_dask.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ def asset_table_to_reader_and_window(
132132
if url:
133133
asset_bounds: Bbox = asset_entry["bounds"]
134134
asset_window = windows.from_bounds(*asset_bounds, spec.transform)
135+
if rescale:
136+
asset_scale_offset = asset_entry["scale_offset"]
137+
else:
138+
asset_scale_offset = (1, 0)
135139

136140
entry: ReaderTableEntry = (
137141
reader(
@@ -140,7 +144,7 @@ def asset_table_to_reader_and_window(
140144
resampling=resampling,
141145
dtype=dtype,
142146
fill_value=fill_value,
143-
rescale=rescale,
147+
scale_offset=asset_scale_offset,
144148
gdal_env=gdal_env,
145149
errors_as_nodata=errors_as_nodata,
146150
),

0 commit comments

Comments
 (0)