-
Notifications
You must be signed in to change notification settings - Fork 52
Description
Description
Stacking the same assets - once with reading from a local directory, the other time with reading from the original remote source - shows different performances. Stacking local elements takes more time on average.
Expected behaviour
Stacking assets that are available locally (on a fast SSD storage close to the CPU unit) should be faster than stacking remote assets, which need to be fetched via low bandwidths first.
Investigation/Reproduction
So far I have only been able to narrow down the phenomenon to the extent that it seems not related to the actual I/O, insofar as the time difference between stacking local and remote assets also persists when using the FakeReader
. The time difference in the example below is as follows:
Minimum local computation time: 10.83 seconds
Minimum remote computation time: 4.38 seconds
import timeit
import planetary_computer as pc
import stac_asset
import stackstac
import warnings
from pystac.item_collection import ItemCollection
from pystac_client import Client
from shapely.geometry import box
from stackstac.reader_protocol import FakeReader
def prepare_env():
# define the time range and bounding box
t_start = "2020-04-01"
t_end = "2020-06-01"
xmin, ymin, xmax, ymax = 142.13, -34.2, 142.18, -34.15
# create a catalog instance
catalog = Client.open(
"https://planetarycomputer.microsoft.com/api/stac/v1", modifier=pc.sign_inplace
)
# search for items
query = catalog.search(
collections="landsat-c2-l2",
datetime=[t_start, t_end],
limit=100,
intersects=box(xmin, ymin, xmax, ymax),
)
remote_path = "remote_items.json"
remote_items = query.item_collection()
remote_items.save_object(remote_path)
# download the items & define path of automatically generated item collection
stac_asset.download_item_collection(
item_collection=remote_items,
directory="test_dir",
keep_non_downloaded=False,
config=stac_asset.Config(include=["qa_pixel"], warn=True),
)
local_path = "test_dir/item-collection.json"
return remote_path, local_path
def test_fun(items):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
stackstac_kwargs = {
"assets": ["qa_pixel"],
"resolution": 100,
"epsg": 3857,
"dtype": "uint8",
"fill_value": 255,
"reader": FakeReader,
}
item_coll = ItemCollection.from_file(items)
_ = stackstac.stack(item_coll, **stackstac_kwargs).compute()
def time_operations():
# time local computation
local_time = timeit.repeat(
"test_fun(local_path)",
setup="from __main__ import test_fun, local_path",
repeat=5,
number=3,
)
# calculate minimum time to capture the best-case scenario
min_local_time = min(local_time)
print(f"Minimum local computation time: {min_local_time} seconds")
# time remote computation
remote_time = timeit.repeat(
"test_fun(remote_path)",
setup="from __main__ import test_fun, remote_path",
repeat=5,
number=3,
)
# calculate minimum time
min_remote_time = min(remote_time)
print(f"Minimum remote computation time: {min_remote_time} seconds")
if __name__ == "__main__":
remote_path, local_path = prepare_env()
time_operations()