Skip to content

Use rasterio's transform instead of homemade coordinates #1712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Jan 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions doc/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -512,10 +512,14 @@ rasterio is installed. Here is an example of how to use
[1703814 values with dtype=uint8]
Coordinates:
* band (band) int64 1 2 3
* y (y) float64 2.827e+06 2.827e+06 2.826e+06 2.826e+06 2.826e+06 ...
* x (x) float64 1.02e+05 1.023e+05 1.026e+05 1.029e+05 1.032e+05 ...
* y (y) float64 2.827e+06 2.826e+06 2.826e+06 2.826e+06 2.826e+06 ...
* x (x) float64 1.021e+05 1.024e+05 1.027e+05 1.03e+05 1.033e+05 ...
Attributes:
crs: +init=epsg:32618
res: (300.0379266750948, 300.041782729805)
transform: (300.0379266750948, 0.0, 101985.0, 0.0, -300.041782729805, 28...
is_tiled: 0
crs: +init=epsg:32618


The ``x`` and ``y`` coordinates are generated out of the file's metadata
(``bounds``, ``width``, ``height``), and they can be understood as cartesian
Expand Down
70 changes: 54 additions & 16 deletions xarray/backends/rasterio_.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import warnings
from collections import OrderedDict
from distutils.version import LooseVersion
import numpy as np

from .. import DataArray
Expand Down Expand Up @@ -113,7 +115,8 @@ def default(s):
return parsed_meta


def open_rasterio(filename, chunks=None, cache=None, lock=None):
def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None,
lock=None):
"""Open a file with rasterio (experimental).

This should work with any file that rasterio can open (most often:
Expand All @@ -123,15 +126,25 @@ def open_rasterio(filename, chunks=None, cache=None, lock=None):
<http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
for more information).

You can generate 2D coordinates from the file's attributes with::

from affine import Affine
da = xr.open_rasterio('path_to_file.tif')
transform = Affine(*da.attrs['transform'])
nx, ny = da.sizes['x'], da.sizes['y']
x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


Parameters
----------
filename : str
Path to the file to open.

Returns
-------
data : DataArray
The newly created DataArray.
parse_coordinates : bool, optional
Whether to parse the x and y coordinates out of the file's
``transform`` attribute or not. The default is to automatically
parse the coordinates only if they are rectilinear (1D).
It can be useful to set ``parse_coordinates=False``
if your files are very large or if you don't need the coordinates.
chunks : int, tuple or dict, optional
Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
Expand All @@ -146,6 +159,11 @@ def open_rasterio(filename, chunks=None, cache=None, lock=None):
:py:func:`dask.array.from_array`. By default, a global lock is
used to avoid issues with concurrent access to the same file when using
dask's multithreaded backend.

Returns
-------
data : DataArray
The newly created DataArray.
"""

import rasterio
Expand All @@ -161,18 +179,38 @@ def open_rasterio(filename, chunks=None, cache=None, lock=None):
raise ValueError('Unknown dims')
coords['band'] = np.asarray(riods.indexes)

# Get geo coords
nx, ny = riods.width, riods.height
dx, dy = riods.res[0], -riods.res[1]
x0 = riods.bounds.right if dx < 0 else riods.bounds.left
y0 = riods.bounds.top if dy < 0 else riods.bounds.bottom
coords['y'] = np.linspace(start=y0 + dy / 2, num=ny,
stop=(y0 + (ny - 1) * dy) + dy / 2)
coords['x'] = np.linspace(start=x0 + dx / 2, num=nx,
stop=(x0 + (nx - 1) * dx) + dx / 2)
# Get coordinates
if LooseVersion(rasterio.__version__) < '1.0':
transform = riods.affine
else:
transform = riods.transform
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use attribute lookup fallback instead, e.g.,

try:
    transform = riods.transform
except AttributeError:
    transform = riods.affine

That's usually slightly more robust than explicitly checking versions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The transform attribute exists on the older version as well, but there it is a tuple specifying the transform in GDAL format. Maybe it'd be better to check whether transform is an instance of Affine instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it'd be better to check whether transform is an instance of Affine instead?

I don't know... Reading the transform attribute with rio < 1.0 raises a warning, reading the affine attribute with rio >= 1.0 raises a warning, so we'll have to catch the warnings in order to test this. Would that be more elegant?

if transform.is_rectilinear:
# 1d coordinates
parse = True if parse_coordinates is None else parse_coordinates
if parse:
nx, ny = riods.width, riods.height
# xarray coordinates are pixel centered
x, _ = (np.arange(nx)+0.5, np.zeros(nx)+0.5) * transform
_, y = (np.zeros(ny)+0.5, np.arange(ny)+0.5) * transform
coords['y'] = y
coords['x'] = x
else:
# 2d coordinates
parse = False if (parse_coordinates is None) else parse_coordinates
if parse:
warnings.warn("The file coordinates' transformation isn't "
"rectilinear: xarray won't parse the coordinates "
"in this case. Set `parse_coordinates=False` to "
"suppress this warning.",
RuntimeWarning, stacklevel=3)

# Attributes
attrs = {}
attrs = dict()
# Affine transformation matrix (always available)
# This describes coefficients mapping pixel coordinates to CRS
# For serialization store as tuple of 6 floats, the last row being
# always (0, 0, 1) per definition (see https://github.com/sgillies/affine)
attrs['transform'] = tuple(transform)[:6]
if hasattr(riods, 'crs') and riods.crs:
# CRS is a dict-like object specific to rasterio
# If CRS is not None, we convert it back to a PROJ4 string using
Expand Down
82 changes: 74 additions & 8 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2227,30 +2227,96 @@ def test_utm(self):
with create_tmp_geotiff() as (tmp_file, expected):
with xr.open_rasterio(tmp_file) as rioda:
assert_allclose(rioda, expected)
assert 'crs' in rioda.attrs
assert isinstance(rioda.attrs['crs'], basestring)
assert 'res' in rioda.attrs
assert isinstance(rioda.attrs['res'], tuple)
assert 'is_tiled' in rioda.attrs
assert isinstance(rioda.attrs['is_tiled'], np.uint8)
assert 'transform' in rioda.attrs
assert isinstance(rioda.attrs['transform'], tuple)

# Check no parse coords
with xr.open_rasterio(tmp_file, parse_coordinates=False) as rioda:
assert 'x' not in rioda.coords
assert 'y' not in rioda.coords

def test_non_rectilinear(self):
import rasterio
from rasterio.transform import from_origin

# Create a geotiff file with 2d coordinates
with create_tmp_file(suffix='.tif') as tmp_file:
# data
nx, ny, nz = 4, 3, 3
data = np.arange(nx*ny*nz,
dtype=rasterio.float32).reshape(nz, ny, nx)
transform = from_origin(0, 3, 1, 1).rotation(45)
with rasterio.open(
tmp_file, 'w',
driver='GTiff', height=ny, width=nx, count=nz,
transform=transform,
dtype=rasterio.float32) as s:
s.write(data)

# Default is to not parse coords
with xr.open_rasterio(tmp_file) as rioda:
assert 'x' not in rioda.coords
assert 'y' not in rioda.coords
assert 'crs' not in rioda.attrs
assert isinstance(rioda.attrs['res'], tuple)
assert isinstance(rioda.attrs['is_tiled'], np.uint8)
assert isinstance(rioda.attrs['transform'], tuple)

# See if a warning is raised if we force it
with self.assertWarns("transformation isn't rectilinear"):
with xr.open_rasterio(tmp_file,
parse_coordinates=True) as rioda:
assert 'x' not in rioda.coords
assert 'y' not in rioda.coords

def test_platecarree(self):
with create_tmp_geotiff(8, 10, 1, transform_args=[1, 2, 0.5, 2.],
crs='+proj=latlong') \
as (tmp_file, expected):
with xr.open_rasterio(tmp_file) as rioda:
assert_allclose(rioda, expected)
assert 'crs' in rioda.attrs
assert isinstance(rioda.attrs['crs'], basestring)
assert 'res' in rioda.attrs
assert isinstance(rioda.attrs['res'], tuple)
assert 'is_tiled' in rioda.attrs
assert isinstance(rioda.attrs['is_tiled'], np.uint8)
assert 'transform' in rioda.attrs
assert isinstance(rioda.attrs['transform'], tuple)

def test_notransform(self):
# regression test for https://github.com/pydata/xarray/issues/1686
import rasterio
import warnings

# Create a geotiff file
with warnings.catch_warnings():
# rasterio throws a NotGeoreferencedWarning here, which is
# expected since we test rasterio's defaults in this case.
warnings.filterwarnings('ignore', category=UserWarning,
message='Dataset has no geotransform set')
with create_tmp_file(suffix='.tif') as tmp_file:
# data
nx, ny, nz = 4, 3, 3
data = np.arange(nx*ny*nz,
dtype=rasterio.float32).reshape(nz, ny, nx)
with rasterio.open(
tmp_file, 'w',
driver='GTiff', height=ny, width=nx, count=nz,
dtype=rasterio.float32) as s:
s.write(data)

# Tests
expected = DataArray(data,
dims=('band', 'y', 'x'),
coords={'band': [1, 2, 3],
'y': [0.5, 1.5, 2.5],
'x': [0.5, 1.5, 2.5, 3.5],
})
with xr.open_rasterio(tmp_file) as rioda:
assert_allclose(rioda, expected)
assert isinstance(rioda.attrs['res'], tuple)
assert isinstance(rioda.attrs['is_tiled'], np.uint8)
assert isinstance(rioda.attrs['transform'], tuple)

def test_indexing(self):
with create_tmp_geotiff(8, 10, 3, transform_args=[1, 2, 0.5, 2.],
crs='+proj=latlong') as (tmp_file, expected):
Expand Down