@@ -21,8 +21,8 @@ example::
21
21
>>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000), dtype='i4')
22
22
>>> z
23
23
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
24
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
25
- nbytes: 381.5M; nbytes_stored: 317 ; ratio: 1261829.7 ; initialized: 0/100
24
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
25
+ nbytes: 381.5M; nbytes_stored: 313 ; ratio: 1277955.3 ; initialized: 0/100
26
26
store: builtins.dict
27
27
28
28
The code above creates a 2-dimensional array of 32-bit integers with
@@ -44,7 +44,7 @@ scalar value::
44
44
>>> z[:] = 42
45
45
>>> z
46
46
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
47
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
47
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
48
48
nbytes: 381.5M; nbytes_stored: 2.2M; ratio: 170.4; initialized: 100/100
49
49
store: builtins.dict
50
50
@@ -92,8 +92,8 @@ enabling persistence of data between sessions. For example::
92
92
... chunks=(1000, 1000), dtype='i4', fill_value=0)
93
93
>>> z1
94
94
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
95
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
96
- nbytes: 381.5M; nbytes_stored: 317 ; ratio: 1261829.7 ; initialized: 0/100
95
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
96
+ nbytes: 381.5M; nbytes_stored: 313 ; ratio: 1277955.3 ; initialized: 0/100
97
97
store: zarr.storage.DirectoryStore
98
98
99
99
The array above will store its configuration metadata and all
@@ -116,8 +116,8 @@ Check that the data have been written and can be read again::
116
116
>>> z2 = zarr.open('example.zarr', mode='r')
117
117
>>> z2
118
118
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
119
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
120
- nbytes: 381.5M; nbytes_stored: 2.3M; ratio: 163.8 ; initialized: 100/100
119
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
120
+ nbytes: 381.5M; nbytes_stored: 2.3M; ratio: 163.9 ; initialized: 100/100
121
121
store: zarr.storage.DirectoryStore
122
122
>>> np.all(z1[:] == z2[:])
123
123
True
@@ -135,8 +135,8 @@ can be increased or decreased in length. For example::
135
135
>>> z.resize(20000, 10000)
136
136
>>> z
137
137
zarr.core.Array((20000, 10000), float64, chunks=(1000, 1000), order=C)
138
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
139
- nbytes: 1.5G; nbytes_stored: 5.9M ; ratio: 259.9 ; initialized: 100/200
138
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
139
+ nbytes: 1.5G; nbytes_stored: 5.7M ; ratio: 268.5 ; initialized: 100/200
140
140
store: builtins.dict
141
141
142
142
Note that when an array is resized, the underlying data are not
@@ -151,20 +151,20 @@ which can be used to append data to any axis. E.g.::
151
151
>>> z = zarr.array(a, chunks=(1000, 100))
152
152
>>> z
153
153
zarr.core.Array((10000, 1000), int32, chunks=(1000, 100), order=C)
154
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
155
- nbytes: 38.1M; nbytes_stored: 2.0M ; ratio: 19.3 ; initialized: 100/100
154
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
155
+ nbytes: 38.1M; nbytes_stored: 1.9M ; ratio: 20.0 ; initialized: 100/100
156
156
store: builtins.dict
157
157
>>> z.append(a)
158
158
>>> z
159
159
zarr.core.Array((20000, 1000), int32, chunks=(1000, 100), order=C)
160
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
161
- nbytes: 76.3M; nbytes_stored: 4.0M ; ratio: 19.3 ; initialized: 200/200
160
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
161
+ nbytes: 76.3M; nbytes_stored: 3.8M ; ratio: 20.0 ; initialized: 200/200
162
162
store: builtins.dict
163
163
>>> z.append(np.vstack([a, a]), axis=1)
164
164
>>> z
165
165
zarr.core.Array((20000, 2000), int32, chunks=(1000, 100), order=C)
166
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
167
- nbytes: 152.6M; nbytes_stored: 7.9M ; ratio: 19.3 ; initialized: 400/400
166
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
167
+ nbytes: 152.6M; nbytes_stored: 7.6M ; ratio: 20.0 ; initialized: 400/400
168
168
store: builtins.dict
169
169
170
170
.. _tutorial_compress :
@@ -188,17 +188,24 @@ functions. For example::
188
188
189
189
>>> z = zarr.array(np.arange(100000000, dtype='i4').reshape(10000, 10000),
190
190
... chunks=(1000, 1000), compression='blosc',
191
- ... compression_opts=dict(cname='lz4 ', clevel=3, shuffle=2))
191
+ ... compression_opts=dict(cname='zstd ', clevel=3, shuffle=2))
192
192
>>> z
193
193
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
194
- compression: blosc; compression_opts: {'clevel': 3, 'cname': 'lz4 ', 'shuffle': 2}
195
- nbytes: 381.5M; nbytes_stored: 17.6M ; ratio: 21.7 ; initialized: 100/100
194
+ compression: blosc; compression_opts: {'clevel': 3, 'cname': 'zstd ', 'shuffle': 2}
195
+ nbytes: 381.5M; nbytes_stored: 3.1M ; ratio: 121.1 ; initialized: 100/100
196
196
store: builtins.dict
197
197
198
198
The array above will use Blosc as the primary compressor, using the
199
- LZ4 algorithm (compression level 3) internally within Blosc, and with
199
+ Zstandard algorithm (compression level 3) internally within Blosc, and with
200
200
the bitshuffle filter applied.
201
201
202
+ A list of the internal compression libraries available within Blosc can be
203
+ obtained via::
204
+
205
+ >>> from zarr import blosc
206
+ >>> blosc.list_compressors()
207
+ ['blosclz', 'lz4', 'lz4hc', 'snappy', 'zlib', 'zstd']
208
+
202
209
In addition to Blosc, other compression libraries can also be
203
210
used. Zarr comes with support for zlib, BZ2 and LZMA compression, via
204
211
the Python standard library. For example, here is an array using zlib
@@ -270,8 +277,8 @@ array with thread synchronization::
270
277
... synchronizer=zarr.ThreadSynchronizer())
271
278
>>> z
272
279
zarr.sync.SynchronizedArray((10000, 10000), int32, chunks=(1000, 1000), order=C)
273
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
274
- nbytes: 381.5M; nbytes_stored: 317 ; ratio: 1261829.7 ; initialized: 0/100
280
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
281
+ nbytes: 381.5M; nbytes_stored: 313 ; ratio: 1277955.3 ; initialized: 0/100
275
282
store: builtins.dict; synchronizer: zarr.sync.ThreadSynchronizer
276
283
277
284
This array is safe to read or write within a multi-threaded program.
@@ -285,8 +292,8 @@ provided that all processes have access to a shared file system. E.g.::
285
292
... synchronizer=synchronizer)
286
293
>>> z
287
294
zarr.sync.SynchronizedArray((10000, 10000), int32, chunks=(1000, 1000), order=C)
288
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
289
- nbytes: 381.5M; nbytes_stored: 317 ; ratio: 1261829.7 ; initialized: 0/100
295
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
296
+ nbytes: 381.5M; nbytes_stored: 313 ; ratio: 1277955.3 ; initialized: 0/100
290
297
store: zarr.storage.DirectoryStore; synchronizer: zarr.sync.ProcessSynchronizer
291
298
292
299
This array is safe to read or write from multiple processes.
@@ -350,13 +357,13 @@ data. E.g.::
350
357
>>> a = np.arange(100000000, dtype='i4').reshape(10000, 10000).T
351
358
>>> zarr.array(a, chunks=(1000, 1000))
352
359
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=C)
353
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
354
- nbytes: 381.5M; nbytes_stored: 26.1M ; ratio: 14.6 ; initialized: 100/100
360
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
361
+ nbytes: 381.5M; nbytes_stored: 26.3M ; ratio: 14.5 ; initialized: 100/100
355
362
store: builtins.dict
356
363
>>> zarr.array(a, chunks=(1000, 1000), order='F')
357
364
zarr.core.Array((10000, 10000), int32, chunks=(1000, 1000), order=F)
358
- compression: blosc; compression_opts: {'clevel': 5, 'cname': 'blosclz ', 'shuffle': 1}
359
- nbytes: 381.5M; nbytes_stored: 10.0M ; ratio: 38.0 ; initialized: 100/100
365
+ compression: blosc; compression_opts: {'clevel': 5, 'cname': 'lz4 ', 'shuffle': 1}
366
+ nbytes: 381.5M; nbytes_stored: 9.5M ; ratio: 40.1 ; initialized: 100/100
360
367
store: builtins.dict
361
368
362
369
In the above example, Fortran order gives a better compression ratio. This
@@ -460,12 +467,12 @@ Configuring Blosc
460
467
461
468
The Blosc compressor is able to use multiple threads internally to
462
469
accelerate compression and decompression. By default, Zarr allows
463
- Blosc to use up to 4 internal threads. The number of Blosc threads can
464
- be changed, e.g.::
470
+ Blosc to use up to 8 internal threads. The number of Blosc threads can
471
+ be changed to increase or decrease this number , e.g.::
465
472
466
473
>>> from zarr import blosc
467
474
>>> blosc.set_nthreads(2)
468
- 4
475
+ 8
469
476
470
477
When a Zarr array is being used within a multi-threaded program, Zarr
471
478
automatically switches to using Blosc in a single-threaded
0 commit comments