-
-
Notifications
You must be signed in to change notification settings - Fork 38
Expand file tree
/
Copy pathblosc2_ext.pyx
More file actions
3403 lines (2861 loc) · 134 KB
/
blosc2_ext.pyx
File metadata and controls
3403 lines (2861 loc) · 134 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# This source code is licensed under a BSD-style license (found in the
# LICENSE file in the root directory of this source tree)
#######################################################################
#cython: language_level=3
import os
import ast
import atexit
import pathlib
import _ctypes
import cython
from cpython cimport (
Py_buffer,
PyBUF_SIMPLE,
PyBuffer_Release,
PyBytes_FromStringAndSize,
PyObject_GetBuffer,
)
from cpython.ref cimport Py_INCREF, Py_DECREF
from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New
from cython.operator cimport dereference
from libc.stdint cimport uintptr_t
from libc.stdlib cimport free, malloc, realloc, calloc
from libc.stdlib cimport abs as c_abs
from libc.string cimport memcpy, memset, strcpy, strdup, strlen
from libcpp cimport bool as c_bool
from enum import Enum
import numpy as np
from msgpack import packb, unpackb
import blosc2
cimport numpy as np
np.import_array()
cdef extern from "<stdint.h>":
ctypedef signed char int8_t
ctypedef signed short int16_t
ctypedef signed int int32_t
ctypedef signed long int64_t
ctypedef unsigned char uint8_t
ctypedef unsigned short uint16_t
ctypedef unsigned int uint32_t
ctypedef unsigned long long uint64_t
cdef extern from "<stdio.h>":
int printf(const char *format, ...) nogil
cdef extern from "blosc2.h":
ctypedef enum:
BLOSC2_MAX_FILTERS
BLOSC2_DEFINED_FILTERS_START
BLOSC2_DEFINED_FILTERS_STOP
BLOSC2_GLOBAL_REGISTERED_FILTERS_START
BLOSC2_GLOBAL_REGISTERED_FILTERS_STOP
BLOSC2_GLOBAL_REGISTERED_FILTERS
BLOSC2_USER_REGISTERED_FILTERS_START
BLOSC2_USER_REGISTERED_FILTERS_STOP
BLOSC2_MAX_UDFILTERS
BLOSC2_MAX_METALAYERS
BLOSC2_MAX_VLMETALAYERS
BLOSC2_PREFILTER_INPUTS_MAX
BLOSC_MAX_CODECS
BLOSC_MIN_HEADER_LENGTH
BLOSC_EXTENDED_HEADER_LENGTH
BLOSC2_MAX_OVERHEAD
BLOSC2_MAX_BUFFERSIZE
BLOSC2_MAXBLOCKSIZE
BLOSC2_MAXTYPESIZE
BLOSC_MAX_TYPESIZE
BLOSC_MIN_BUFFERSIZE
ctypedef enum:
BLOSC2_SPECIAL_ZERO
BLOSC2_SPECIAL_NAN
BLOSC2_SPECIAL_UNINIT
ctypedef enum:
BLOSC2_VERSION_STRING
BLOSC2_VERSION_REVISION
BLOSC2_VERSION_DATE
ctypedef enum:
BLOSC2_ERROR_SUCCESS
BLOSC2_ERROR_FAILURE
BLOSC2_ERROR_STREAM
BLOSC2_ERROR_DATA
BLOSC2_ERROR_MEMORY_ALLOC
BLOSC2_ERROR_READ_BUFFER
BLOSC2_ERROR_WRITE_BUFFER
BLOSC2_ERROR_CODEC_SUPPORT
BLOSC2_ERROR_CODEC_PARAM
BLOSC2_ERROR_CODEC_DICT
BLOSC2_ERROR_VERSION_SUPPORT
BLOSC2_ERROR_INVALID_HEADER
BLOSC2_ERROR_INVALID_PARAM
BLOSC2_ERROR_FILE_READ
BLOSC2_ERROR_FILE_WRITE
BLOSC2_ERROR_FILE_OPEN
BLOSC2_ERROR_NOT_FOUND
BLOSC2_ERROR_RUN_LENGTH
BLOSC2_ERROR_FILTER_PIPELINE
BLOSC2_ERROR_CHUNK_INSERT
BLOSC2_ERROR_CHUNK_APPEND
BLOSC2_ERROR_CHUNK_UPDATE
BLOSC2_ERROR_2GB_LIMIT
BLOSC2_ERROR_SCHUNK_COPY
BLOSC2_ERROR_FRAME_TYPE
BLOSC2_ERROR_FILE_TRUNCATE
BLOSC2_ERROR_THREAD_CREATE
BLOSC2_ERROR_POSTFILTER
BLOSC2_ERROR_FRAME_SPECIAL
BLOSC2_ERROR_SCHUNK_SPECIAL
BLOSC2_ERROR_PLUGIN_IO
BLOSC2_ERROR_FILE_REMOVE
ctypedef enum:
BLOSC2_DEFINED_CODECS_START
BLOSC2_DEFINED_CODECS_STOP
BLOSC2_GLOBAL_REGISTERED_CODECS_START
BLOSC2_GLOBAL_REGISTERED_CODECS_STOP
BLOSC2_GLOBAL_REGISTERED_CODECS
BLOSC2_USER_REGISTERED_CODECS_START
BLOSC2_USER_REGISTERED_CODECS_STOP
ctypedef enum:
BLOSC2_IO_FILESYSTEM
BLOSC2_IO_FILESYSTEM_MMAP
BLOSC_IO_LAST_BLOSC_DEFINED
BLOSC_IO_LAST_REGISTERED
cdef int INT_MAX
void blosc2_init()
void blosc2_destroy()
int blosc1_compress(int clevel, int doshuffle, size_t typesize,
size_t nbytes, const void* src, void* dest,
size_t destsize)
int blosc1_decompress(const void* src, void* dest, size_t destsize)
int blosc1_getitem(const void* src, int start, int nitems, void* dest)
int blosc2_getitem(const void* src, int32_t srcsize, int start, int nitems,
void* dest, int32_t destsize)
ctypedef void(*blosc2_threads_callback)(void *callback_data, void (*dojob)(void *), int numjobs,
size_t jobdata_elsize, void *jobdata)
void blosc2_set_threads_callback(blosc2_threads_callback callback, void *callback_data)
int16_t blosc2_set_nthreads(int16_t nthreads)
const char* blosc1_get_compressor()
int blosc1_set_compressor(const char* compname)
void blosc2_set_delta(int dodelta)
int blosc2_compcode_to_compname(int compcode, const char** compname)
int blosc2_compname_to_compcode(const char* compname)
const char* blosc2_list_compressors()
int blosc2_get_complib_info(const char* compname, char** complib,
char** version)
int blosc2_free_resources()
int blosc2_cbuffer_sizes(const void* cbuffer, int32_t* nbytes,
int32_t* cbytes, int32_t* blocksize) nogil
int blosc1_cbuffer_validate(const void* cbuffer, size_t cbytes, size_t* nbytes)
void blosc1_cbuffer_metainfo(const void* cbuffer, size_t* typesize, int* flags)
void blosc1_cbuffer_versions(const void* cbuffer, int* version, int* versionlz)
const char* blosc2_cbuffer_complib(const void* cbuffer)
ctypedef struct blosc2_context:
pass
ctypedef struct blosc2_prefilter_params:
void* user_data
const uint8_t* input
uint8_t* output
int32_t output_size
int32_t output_typesize
int32_t output_offset
int64_t nchunk
int32_t nblock
int32_t tid
uint8_t* ttmp
size_t ttmp_nbytes
blosc2_context* ctx
c_bool output_is_disposable
ctypedef struct blosc2_postfilter_params:
void *user_data
const uint8_t *input
uint8_t *output
int32_t size
int32_t typesize
int32_t offset
int64_t nchunk
int32_t nblock
int32_t tid
uint8_t *ttmp
size_t ttmp_nbytes
blosc2_context *ctx
ctypedef int(*blosc2_prefilter_fn)(blosc2_prefilter_params* params)
ctypedef int(*blosc2_postfilter_fn)(blosc2_postfilter_params *params)
ctypedef struct blosc2_cparams:
uint8_t compcode
uint8_t compcode_meta
uint8_t clevel
int use_dict
int32_t typesize
int16_t nthreads
int32_t blocksize
int32_t splitmode
void *schunk
uint8_t filters[BLOSC2_MAX_FILTERS]
uint8_t filters_meta[BLOSC2_MAX_FILTERS]
blosc2_prefilter_fn prefilter
blosc2_prefilter_params* preparams
int tuner_id
void* tuner_params
c_bool instr_codec
void* codec_params
void* filter_params[BLOSC2_MAX_FILTERS]
cdef const blosc2_cparams BLOSC2_CPARAMS_DEFAULTS
ctypedef struct blosc2_dparams:
int16_t nthreads
void* schunk
blosc2_postfilter_fn postfilter
blosc2_postfilter_params *postparams
int32_t typesize
cdef const blosc2_dparams BLOSC2_DPARAMS_DEFAULTS
blosc2_context* blosc2_create_cctx(blosc2_cparams cparams) nogil
blosc2_context* blosc2_create_dctx(blosc2_dparams dparams) nogil
void blosc2_free_ctx(blosc2_context * context) nogil
int blosc2_set_maskout(blosc2_context *ctx, c_bool *maskout, int nblocks)
int blosc2_compress(int clevel, int doshuffle, int32_t typesize,
const void * src, int32_t srcsize, void * dest,
int32_t destsize) nogil
int blosc2_decompress(const void * src, int32_t srcsize,
void * dest, int32_t destsize)
int blosc2_compress_ctx(
blosc2_context * context, const void * src, int32_t srcsize, void * dest,
int32_t destsize) nogil
int blosc2_decompress_ctx(blosc2_context * context, const void * src,
int32_t srcsize, void * dest, int32_t destsize) nogil
int blosc2_getitem_ctx(blosc2_context* context, const void* src,
int32_t srcsize, int start, int nitems, void* dest,
int32_t destsize) nogil
ctypedef struct blosc2_storage:
c_bool contiguous
char* urlpath
blosc2_cparams* cparams
blosc2_dparams* dparams
blosc2_io *io
cdef const blosc2_storage BLOSC2_STORAGE_DEFAULTS
ctypedef struct blosc2_frame:
pass
ctypedef struct blosc2_metalayer:
char* name
uint8_t* content
int32_t content_len
ctypedef struct blosc2_tuner:
void(*init)(void *config, blosc2_context*cctx, blosc2_context*dctx)
void (*next_blocksize)(blosc2_context *context)
void(*next_cparams)(blosc2_context *context)
void(*update)(blosc2_context *context, double ctime)
void (*free)(blosc2_context *context)
int id
char *name
ctypedef struct blosc2_io:
uint8_t id
const char *name
void* params
ctypedef struct blosc2_stdio_mmap:
const char* mode
int64_t initial_mapping_size
c_bool needs_free
cdef const blosc2_stdio_mmap BLOSC2_STDIO_MMAP_DEFAULTS
ctypedef struct blosc2_schunk:
uint8_t version
uint8_t compcode
uint8_t compcode_meta
uint8_t clevel
uint8_t splitmode
int32_t typesize
int32_t blocksize
int32_t chunksize
uint8_t filters[BLOSC2_MAX_FILTERS]
uint8_t filters_meta[BLOSC2_MAX_FILTERS]
int64_t nchunks
int64_t current_nchunk
int64_t nbytes
int64_t cbytes
uint8_t** data
size_t data_len
blosc2_storage* storage
blosc2_frame* frame
blosc2_context* cctx
blosc2_context* dctx
blosc2_metalayer *metalayers[BLOSC2_MAX_METALAYERS]
uint16_t nmetalayers
blosc2_metalayer *vlmetalayers[BLOSC2_MAX_VLMETALAYERS]
int16_t nvlmetalayers
int tuner_id
void *tuner_params
int8_t ndim
int64_t *blockshape
blosc2_schunk *blosc2_schunk_new(blosc2_storage *storage)
blosc2_schunk *blosc2_schunk_copy(blosc2_schunk *schunk, blosc2_storage *storage)
blosc2_schunk *blosc2_schunk_from_buffer(uint8_t *cframe, int64_t len, c_bool copy)
blosc2_schunk *blosc2_schunk_open_offset(const char* urlpath, int64_t offset)
blosc2_schunk* blosc2_schunk_open_offset_udio(const char* urlpath, int64_t offset, const blosc2_io *udio)
int64_t blosc2_schunk_to_buffer(blosc2_schunk* schunk, uint8_t** cframe, c_bool* needs_free)
void blosc2_schunk_avoid_cframe_free(blosc2_schunk *schunk, c_bool avoid_cframe_free)
int64_t blosc2_schunk_to_file(blosc2_schunk* schunk, const char* urlpath)
int64_t blosc2_schunk_free(blosc2_schunk *schunk)
int64_t blosc2_schunk_append_chunk(blosc2_schunk *schunk, uint8_t *chunk, c_bool copy)
int64_t blosc2_schunk_update_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t *chunk, c_bool copy)
int64_t blosc2_schunk_insert_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t *chunk, c_bool copy)
int64_t blosc2_schunk_delete_chunk(blosc2_schunk *schunk, int64_t nchunk)
int64_t blosc2_schunk_fill_special(blosc2_schunk *schunk, int64_t nitems, int special_value,
int32_t chunksize);
int64_t blosc2_schunk_append_buffer(blosc2_schunk *schunk, void *src, int32_t nbytes)
int blosc2_schunk_decompress_chunk(blosc2_schunk *schunk, int64_t nchunk, void *dest, int32_t nbytes)
int blosc2_schunk_get_chunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t ** chunk,
c_bool *needs_free) nogil
int blosc2_schunk_get_lazychunk(blosc2_schunk *schunk, int64_t nchunk, uint8_t ** chunk,
c_bool *needs_free) nogil
int blosc2_schunk_get_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t stop, void *buffer)
int blosc2_schunk_set_slice_buffer(blosc2_schunk *schunk, int64_t start, int64_t stop, void *buffer)
int blosc2_schunk_get_cparams(blosc2_schunk *schunk, blosc2_cparams** cparams)
int blosc2_schunk_get_dparams(blosc2_schunk *schunk, blosc2_dparams** dparams)
int blosc2_schunk_reorder_offsets(blosc2_schunk *schunk, int64_t *offsets_order)
int64_t blosc2_schunk_frame_len(blosc2_schunk* schunk)
int blosc2_chunk_repeatval(blosc2_cparams cparams, const int32_t nbytes,
void *dest, int32_t destsize, const void *repeatval)
int blosc2_meta_exists(blosc2_schunk *schunk, const char *name)
int blosc2_meta_add(blosc2_schunk *schunk, const char *name, uint8_t *content,
int32_t content_len)
int blosc2_meta_update(blosc2_schunk *schunk, const char *name, uint8_t *content,
int32_t content_len)
int blosc2_meta_get(blosc2_schunk *schunk, const char *name, uint8_t **content,
int32_t *content_len)
int blosc2_vlmeta_exists(blosc2_schunk *schunk, const char *name)
int blosc2_vlmeta_add(blosc2_schunk *schunk, const char *name,
uint8_t *content, int32_t content_len, blosc2_cparams *cparams)
int blosc2_vlmeta_update(blosc2_schunk *schunk, const char *name,
uint8_t *content, int32_t content_len, blosc2_cparams *cparams)
int blosc2_vlmeta_get(blosc2_schunk *schunk, const char *name,
uint8_t **content, int32_t *content_len)
int blosc2_vlmeta_delete(blosc2_schunk *schunk, const char *name)
int blosc2_vlmeta_get_names(blosc2_schunk *schunk, char **names)
int blosc1_get_blocksize()
void blosc1_set_blocksize(size_t blocksize)
void blosc1_set_schunk(blosc2_schunk *schunk)
int blosc2_remove_dir(const char *path)
int blosc2_remove_urlpath(const char *path)
ctypedef int(*blosc2_codec_encoder_cb)(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len,
uint8_t meta, blosc2_cparams *cparams, const void *chunk)
ctypedef int(*blosc2_codec_decoder_cb)(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len,
uint8_t meta, blosc2_dparams *dparams, const void *chunk)
ctypedef struct blosc2_codec:
uint8_t compcode
char* compname
uint8_t complib
uint8_t version
blosc2_codec_encoder_cb encoder
blosc2_codec_decoder_cb decoder
int blosc2_register_codec(blosc2_codec *codec)
ctypedef int(*blosc2_filter_forward_cb)(const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_cparams *, uint8_t)
ctypedef int(*blosc2_filter_backward_cb)(const uint8_t *, uint8_t *, int32_t, uint8_t, blosc2_dparams *, uint8_t)
ctypedef struct blosc2_filter:
uint8_t id
char* name
blosc2_filter_forward_cb forward
blosc2_filter_backward_cb backward
int blosc2_register_filter(blosc2_filter *filter)
int blosc2_get_slice_nchunks(blosc2_schunk * schunk, int64_t *start, int64_t *stop, int64_t ** chunks_idx)
cdef extern from "b2nd.h":
ctypedef enum:
B2ND_MAX_DIM
B2ND_MAX_METALAYERS
B2ND_DEFAULT_DTYPE_FORMAT
cdef struct chunk_cache_s:
uint8_t *data
int64_t nchunk
ctypedef struct b2nd_array_t:
blosc2_schunk* sc
int64_t shape[B2ND_MAX_DIM]
int32_t chunkshape[B2ND_MAX_DIM]
int64_t extshape[B2ND_MAX_DIM]
int32_t blockshape[B2ND_MAX_DIM]
int64_t extchunkshape[B2ND_MAX_DIM]
int64_t nitems
int32_t chunknitems
int64_t extnitems
int32_t blocknitems
int64_t extchunknitems
int8_t ndim
chunk_cache_s chunk_cache
int64_t item_array_strides[B2ND_MAX_DIM]
int64_t item_chunk_strides[B2ND_MAX_DIM]
int64_t item_extchunk_strides[B2ND_MAX_DIM]
int64_t item_block_strides[B2ND_MAX_DIM]
int64_t block_chunk_strides[B2ND_MAX_DIM]
int64_t chunk_array_strides[B2ND_MAX_DIM]
char *dtype
int8_t dtype_format
ctypedef struct b2nd_context_t:
pass
b2nd_context_t *b2nd_create_ctx(blosc2_storage *b2_storage, int8_t ndim, int64_t *shape,
int32_t *chunkshape, int32_t *blockshape, char *dtype,
int8_t dtype_format, blosc2_metalayer *metalayers, int32_t nmetalayers)
int b2nd_free_ctx(b2nd_context_t *ctx)
int b2nd_uninit(b2nd_context_t *ctx, b2nd_array_t ** array)
int b2nd_nans(b2nd_context_t * ctx, b2nd_array_t ** array)
int b2nd_empty(b2nd_context_t *ctx, b2nd_array_t **array)
int b2nd_zeros(b2nd_context_t *ctx, b2nd_array_t **array)
int b2nd_full(b2nd_context_t *ctx, b2nd_array_t ** array, void *fill_value)
int b2nd_free(b2nd_array_t *array)
int b2nd_get_slice_cbuffer(b2nd_array_t *array,
int64_t *start, int64_t *stop,
void *buffer, int64_t *buffershape, int64_t buffersize)
int b2nd_set_slice_cbuffer(void *buffer, int64_t *buffershape, int64_t buffersize,
int64_t *start, int64_t *stop, b2nd_array_t *array)
int b2nd_get_slice(b2nd_context_t *ctx, b2nd_array_t **array, b2nd_array_t *src, const int64_t *start,
const int64_t *stop)
int b2nd_from_cbuffer(b2nd_context_t *ctx, b2nd_array_t **array, void *buffer, int64_t buffersize)
int b2nd_to_cbuffer(b2nd_array_t *array, void *buffer, int64_t buffersize)
int b2nd_from_cframe(uint8_t *cframe, int64_t cframe_len, c_bool copy, b2nd_array_t ** array);
int b2nd_to_cframe(const b2nd_array_t *array, uint8_t ** cframe, int64_t *cframe_len,
c_bool *needs_free);
int b2nd_squeeze(b2nd_array_t *array, b2nd_array_t **view)
int b2nd_squeeze_index(b2nd_array_t *array, b2nd_array_t **view, const c_bool *index)
int b2nd_resize(b2nd_array_t *array, const int64_t *new_shape, const int64_t *start)
int b2nd_copy(b2nd_context_t *ctx, b2nd_array_t *src, b2nd_array_t **array)
int b2nd_concatenate(b2nd_context_t *ctx, b2nd_array_t *src1, b2nd_array_t *src2,
int8_t axis, c_bool copy, b2nd_array_t **array)
int b2nd_expand_dims(const b2nd_array_t *array, b2nd_array_t ** view, const c_bool *axis, const uint8_t final_dims)
int b2nd_get_orthogonal_selection(const b2nd_array_t *array, int64_t ** selection,
int64_t *selection_size, void *buffer,
int64_t *buffershape, int64_t buffersize)
int b2nd_set_orthogonal_selection(const b2nd_array_t *array, int64_t ** selection,
int64_t *selection_size, void *buffer,
int64_t *buffershape, int64_t buffersize)
int b2nd_from_schunk(blosc2_schunk *schunk, b2nd_array_t **array)
void blosc2_unidim_to_multidim(uint8_t ndim, int64_t *shape, int64_t i, int64_t *index) nogil
int b2nd_copy_buffer2(int8_t ndim,
int32_t itemsize,
const void *src, const int64_t *src_pad_shape,
const int64_t *src_start, const int64_t *src_stop,
void *dst, const int64_t *dst_pad_shape,
const int64_t *dst_start)
# miniexpr C API declarations
cdef extern from "miniexpr.h":
ctypedef enum me_dtype:
ME_AUTO,
ME_BOOL
ME_INT8
ME_INT16
ME_INT32
ME_INT64
ME_UINT8
ME_UINT16
ME_UINT32
ME_UINT64
ME_FLOAT32
ME_FLOAT64
ME_COMPLEX64
ME_COMPLEX128
# typedef struct me_variable
ctypedef struct me_variable:
const char *name
me_dtype dtype
const void *address
int type
void *context
ctypedef struct me_expr:
int type
double value
const double *bound
const void *function
void *output
int nitems
me_dtype dtype
me_dtype input_dtype
void *bytecode
int ncode
void *parameters[1]
int me_compile(const char *expression, const me_variable *variables,
int var_count, me_dtype dtype, int *error, me_expr **out)
int me_compile_nd(const char *expression, const me_variable *variables,
int var_count, me_dtype dtype, int ndims,
const int64_t *shape, const int32_t *chunkshape,
const int32_t *blockshape, int *error, me_expr **out)
ctypedef enum me_compile_status:
ME_COMPILE_SUCCESS
ME_COMPILE_ERR_OOM
ME_COMPILE_ERR_PARSE
ME_COMPILE_ERR_INVALID_ARG
ME_COMPILE_ERR_COMPLEX_UNSUPPORTED
ME_COMPILE_ERR_REDUCTION_INVALID
ME_COMPILE_ERR_VAR_MIXED
ME_COMPILE_ERR_VAR_UNSPECIFIED
ME_COMPILE_ERR_INVALID_ARG_TYPE
ME_COMPILE_ERR_MIXED_TYPE_NESTED
ctypedef enum me_simd_ulp_mode:
ME_SIMD_ULP_DEFAULT
ME_SIMD_ULP_1
ME_SIMD_ULP_3_5
ctypedef struct me_eval_params:
c_bool disable_simd
me_simd_ulp_mode simd_ulp_mode
int me_eval(const me_expr *expr, const void **vars_block,
int n_vars, void *output_block, int chunk_nitems,
const me_eval_params *params) nogil
int me_eval_nd(const me_expr *expr, const void **vars_block,
int n_vars, void *output_block, int block_nitems,
int64_t nchunk, int64_t nblock, const me_eval_params *params) nogil
int me_nd_valid_nitems(const me_expr *expr, int64_t nchunk, int64_t nblock, int64_t *valid_nitems) nogil
void me_print(const me_expr *n) nogil
void me_free(me_expr *n) nogil
cdef extern from "miniexpr_numpy.h":
me_dtype me_dtype_from_numpy(int numpy_type_num)
cdef extern from "pythread.h":
ctypedef void* PyThread_type_lock
PyThread_type_lock PyThread_allocate_lock() nogil
int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) nogil
void PyThread_release_lock(PyThread_type_lock lock) nogil
void PyThread_free_lock(PyThread_type_lock lock) nogil
ctypedef struct user_filters_udata:
char* py_func
int input_cdtype
int output_cdtype
int32_t chunkshape
ctypedef struct filler_udata:
char* py_func
uintptr_t inputs_id
int output_cdtype
int32_t chunkshape
ctypedef struct udf_udata:
char* py_func
uintptr_t inputs_id
int output_cdtype
b2nd_array_t *array
int64_t chunks_in_array[B2ND_MAX_DIM]
int64_t blocks_in_chunk[B2ND_MAX_DIM]
ctypedef struct me_udata:
b2nd_array_t** inputs
int ninputs
me_eval_params* eval_params
b2nd_array_t* array
void* aux_reduc_ptr
int64_t chunks_in_array[B2ND_MAX_DIM]
int64_t blocks_in_chunk[B2ND_MAX_DIM]
me_expr* miniexpr_handle
MAX_TYPESIZE = BLOSC2_MAXTYPESIZE
MAX_BUFFERSIZE = BLOSC2_MAX_BUFFERSIZE
MAX_BLOCKSIZE = BLOSC2_MAXBLOCKSIZE
MAX_OVERHEAD = BLOSC2_MAX_OVERHEAD
MAX_DIM = B2ND_MAX_DIM
VERSION_STRING = (<char*>BLOSC2_VERSION_STRING).decode("utf-8")
VERSION_DATE = (<char*>BLOSC2_VERSION_DATE).decode("utf-8")
MIN_HEADER_LENGTH = BLOSC_MIN_HEADER_LENGTH
EXTENDED_HEADER_LENGTH = BLOSC_EXTENDED_HEADER_LENGTH
DEFINED_CODECS_STOP = BLOSC2_DEFINED_CODECS_STOP
GLOBAL_REGISTERED_CODECS_STOP = BLOSC2_GLOBAL_REGISTERED_CODECS_STOP
USER_REGISTERED_CODECS_STOP = BLOSC2_USER_REGISTERED_CODECS_STOP
DEFAULT_DTYPE_FORMAT = B2ND_DEFAULT_DTYPE_FORMAT
cdef _check_comp_length(comp_name, comp_len):
if comp_len < BLOSC_MIN_HEADER_LENGTH:
raise ValueError(f"{comp_name} cannot be less than {BLOSC_MIN_HEADER_LENGTH} bytes")
blosc2_init()
cdef PyThread_type_lock chunk_cache_lock = PyThread_allocate_lock()
if chunk_cache_lock == NULL:
raise MemoryError("Could not allocate chunk cache lock")
@atexit.register
def destroy():
if chunk_cache_lock != NULL:
PyThread_free_lock(chunk_cache_lock)
blosc2_destroy()
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def nearest_divisor(int64_t a, int64_t b, bint strict=False):
"""Find the divisor of `a` that is closest to `b`.
Parameters
----------
a : int
The number for which to find divisors.
b : int
The reference value to compare divisors against.
strict : bool, optional
If True, always use the downward search algorithm.
Returns
-------
int
The divisor of `a` that is closest to `b`.
Notes
-----
This is a *much* faster version than its Python counterpart.
"""
cdef:
int64_t i, closest, min_diff, diff
bint found
if a > 100_000 or strict:
# For large numbers or when strict=True, search downwards from b
i = b
while i > 0:
if a % i == 0:
return i
i -= 1
return 1 # Fallback to 1, which is always a divisor
# For smaller numbers, find the closest divisor
closest = 1
min_diff = a # Initialize to a large value
found = False
# Search for divisors up to sqrt(a)
i = 1
while i * i <= a:
if a % i == 0:
# Check i as a divisor
diff = c_abs(i - b)
if diff < min_diff:
min_diff = diff
closest = i
found = True
# Check a/i as a divisor
diff = c_abs(a // i - b)
if diff < min_diff:
min_diff = diff
closest = a // i
found = True
i += 1
return closest if found else 1
def cbuffer_sizes(src):
cdef const uint8_t[:] typed_view_src
mem_view_src = memoryview(src)
typed_view_src = mem_view_src.cast('B')
_check_comp_length('src', typed_view_src.nbytes)
cdef int32_t nbytes
cdef int32_t cbytes
cdef int32_t blocksize
blosc2_cbuffer_sizes(<void*>&typed_view_src[0], &nbytes, &cbytes, &blocksize)
return nbytes, cbytes, blocksize
cpdef compress(src, int32_t typesize=8, int clevel=9, filter=blosc2.Filter.SHUFFLE, codec=blosc2.Codec.BLOSCLZ):
set_compressor(codec)
cdef int32_t len_src = <int32_t> len(src)
cdef Py_buffer buf
PyObject_GetBuffer(src, &buf, PyBUF_SIMPLE)
dest = bytes(buf.len + BLOSC2_MAX_OVERHEAD)
cdef int32_t len_dest = <int32_t> len(dest)
cdef int size
cdef int filter_ = filter.value if isinstance(filter, Enum) else 0
if RELEASEGIL:
_dest = <void*> <char *> dest
with nogil:
size = blosc2_compress(clevel, filter_, <int32_t> typesize, buf.buf, <int32_t> buf.len, _dest, len_dest)
else:
size = blosc2_compress(clevel, filter_, <int32_t> typesize, buf.buf, <int32_t> buf.len, <void*> <char *> dest, len_dest)
PyBuffer_Release(&buf)
if size > 0:
return dest[:size]
else:
raise ValueError("Cannot compress")
def decompress(src, dst=None, as_bytearray=False):
cdef int32_t nbytes
cdef int32_t cbytes
cdef int32_t blocksize
cdef const uint8_t[:] typed_view_src
mem_view_src = memoryview(src)
typed_view_src = mem_view_src.cast('B')
_check_comp_length('src', len(typed_view_src))
blosc2_cbuffer_sizes(<void*>&typed_view_src[0], &nbytes, &cbytes, &blocksize)
cdef Py_buffer buf
if dst is not None:
PyObject_GetBuffer(dst, &buf, PyBUF_SIMPLE)
if buf.len == 0:
raise ValueError("The dst length must be greater than 0")
size = blosc1_decompress(<void*>&typed_view_src[0], buf.buf, buf.len)
PyBuffer_Release(&buf)
else:
dst = PyBytes_FromStringAndSize(NULL, nbytes)
if dst is None:
raise RuntimeError("Could not get a bytes object")
size = blosc1_decompress(<void*>&typed_view_src[0], <void*> <char *> dst, len(dst))
if as_bytearray:
dst = bytearray(dst)
if size >= 0:
return dst
if size < 0:
raise RuntimeError("Cannot decompress")
def set_compressor(codec):
codec = codec.name.lower().encode("utf-8")
size = blosc1_set_compressor(codec)
if size == -1:
raise ValueError("The code is not available")
else:
return size
def free_resources():
rc = blosc2_free_resources()
if rc < 0:
raise ValueError("Could not free the resources")
def set_nthreads(nthreads):
if nthreads > INT_MAX:
raise ValueError("nthreads must be less or equal than 2^31 - 1.")
rc = blosc2_set_nthreads(nthreads)
if rc < 0:
raise ValueError("nthreads must be a positive integer.")
else:
return rc
def set_blocksize(size_t blocksize=0):
blosc1_set_blocksize(blocksize)
def clib_info(codec):
cdef char* clib
cdef char* version
codec = codec.name.lower().encode("utf-8")
rc = blosc2_get_complib_info(codec, &clib, &version)
if rc >= 0:
return clib, version
else:
raise ValueError("The compression library is not supported.")
def get_clib(bytesobj):
rc = blosc2_cbuffer_complib(<void *> <char*> bytesobj)
if rc == NULL:
raise ValueError("Cannot get the info for the compressor")
else:
return rc
def get_compressor():
return blosc1_get_compressor()
cdef c_bool RELEASEGIL = False
def set_releasegil(c_bool gilstate):
global RELEASEGIL
oldstate = RELEASEGIL
RELEASEGIL = gilstate
return oldstate
def get_blocksize():
return blosc1_get_blocksize()
cdef _check_cparams(blosc2_cparams *cparams):
if cparams.nthreads > 1:
if BLOSC2_USER_REGISTERED_CODECS_START <= cparams.compcode <= BLOSC2_USER_REGISTERED_CODECS_STOP\
and cparams.compcode in blosc2.ucodecs_registry.keys():
raise ValueError("Cannot use multi-threading with user defined Python codecs")
ufilters = [BLOSC2_USER_REGISTERED_FILTERS_START <= filter <= BLOSC2_USER_REGISTERED_FILTERS_STOP
for filter in cparams.filters]
for i in range(len(ufilters)):
if ufilters[i] and cparams.filters[i] in blosc2.ufilters_registry.keys():
raise ValueError("Cannot use multi-threading with user defined Python filters")
if cparams.prefilter != NULL and cparams.prefilter != <blosc2_prefilter_fn>miniexpr_prefilter:
# Note: miniexpr_prefilter uses miniexpr C API which is thread-friendly,
raise ValueError("`nthreads` must be 1 when a prefilter is set")
cdef _check_dparams(blosc2_dparams* dparams, blosc2_cparams* cparams=NULL):
if cparams == NULL:
return
if dparams.nthreads > 1:
if BLOSC2_USER_REGISTERED_CODECS_START <= cparams.compcode <= BLOSC2_USER_REGISTERED_CODECS_STOP\
and cparams.compcode in blosc2.ucodecs_registry.keys():
raise ValueError("Cannot use multi-threading with user defined Python codecs")
ufilters = [BLOSC2_USER_REGISTERED_FILTERS_START <= filter <= BLOSC2_USER_REGISTERED_FILTERS_STOP
for filter in cparams.filters]
for i in range(len(ufilters)):
if ufilters[i] and cparams.filters[i] in blosc2.ufilters_registry.keys():
raise ValueError("Cannot use multi-threading with user defined Python filters")
if dparams.postfilter != NULL:
raise ValueError("`nthreads` must be 1 when a postfilter is set")
cdef create_cparams_from_kwargs(blosc2_cparams *cparams, kwargs):
if "compcode" in kwargs:
raise NameError("`compcode` has been renamed to `codec`. Please go update your code.")
if "shuffle" in kwargs:
raise NameError("`shuffle` has been substituted by `filters`. Please go update your code.")
codec = kwargs.get('codec', blosc2.cparams_dflts['codec'])
cparams.compcode = codec if not isinstance(codec, blosc2.Codec) else codec.value
cparams.compcode_meta = kwargs.get('codec_meta', blosc2.cparams_dflts['codec_meta'])
cparams.clevel = kwargs.get('clevel', blosc2.cparams_dflts['clevel'])
cparams.use_dict = kwargs.get('use_dict', blosc2.cparams_dflts['use_dict'])
cparams.typesize = typesize = kwargs.get('typesize', blosc2.cparams_dflts['typesize'])
cparams.nthreads = kwargs.get('nthreads', blosc2.nthreads)
cparams.blocksize = kwargs.get('blocksize', blosc2.cparams_dflts['blocksize'])
splitmode = kwargs.get('splitmode', blosc2.cparams_dflts['splitmode'])
cparams.splitmode = splitmode.value
# TODO: support the commented ones in the future
#schunk_c = kwargs.get('schunk', blosc2.cparams_dflts['schunk'])
#cparams.schunk = <void *> schunk_c
cparams.schunk = NULL
for i in range(BLOSC2_MAX_FILTERS):
cparams.filters[i] = 0
cparams.filters_meta[i] = 0
filters = kwargs.get('filters', blosc2.cparams_dflts['filters'])
if len(filters) > BLOSC2_MAX_FILTERS:
raise ValueError(f"filters list cannot exceed {BLOSC2_MAX_FILTERS}")
for i, filter in enumerate(filters):
cparams.filters[i] = filter.value if isinstance(filter, Enum) else filter
# Bytedelta does not work on typesize 1
if cparams.filters[i] == blosc2.Filter.BYTEDELTA.value and typesize == 1:
cparams.filters[i] = 0
if "filters_meta" not in kwargs:
# If not specified, we can still assign a 0 list to it
filters_meta = [0] * len(filters)
else:
filters_meta = kwargs['filters_meta']
if len(filters) != len(filters_meta):
raise ValueError("filters and filters_meta lists must have same length")
cdef int8_t meta_value
for i, meta in enumerate(filters_meta):
# We still may want to encode negative values
meta_value = <int8_t>meta if meta < 0 else meta
if meta_value == 0 and cparams.filters[i] == blosc2.Filter.BYTEDELTA.value:
# bytedelta typesize cannot be zero when using compress2
cparams.filters_meta[i] = <uint8_t>typesize
else:
cparams.filters_meta[i] = <uint8_t>meta_value
cparams.prefilter = NULL
cparams.preparams = NULL
tuner = kwargs.get('tuner', blosc2.cparams_dflts['tuner'])
cparams.tuner_id = tuner.value
cparams.tuner_params = NULL
cparams.instr_codec = False
cparams.codec_params = NULL
for i in range(len(filters)):
cparams.filter_params[i] = NULL
_check_cparams(cparams)
def compress2(src, **kwargs):
cdef blosc2_cparams cparams
create_cparams_from_kwargs(&cparams, kwargs)
cdef blosc2_context *cctx
cdef Py_buffer buf
PyObject_GetBuffer(src, &buf, PyBUF_SIMPLE)
cdef int size
cdef int32_t len_dest = <int32_t> (buf.len + BLOSC2_MAX_OVERHEAD)
dest = bytes(len_dest)
_dest = <void*> <char *> dest
cctx = blosc2_create_cctx(cparams)
if cctx == NULL:
raise RuntimeError("Could not create the compression context")
if RELEASEGIL:
with nogil:
size = blosc2_compress_ctx(cctx, buf.buf, <int32_t> buf.len, _dest, len_dest)
else:
size = blosc2_compress_ctx(cctx, buf.buf, <int32_t> buf.len, _dest, len_dest)
blosc2_free_ctx(cctx)
PyBuffer_Release(&buf)
if size < 0:
raise RuntimeError("Could not compress the data")
elif size == 0:
del dest
raise RuntimeError("The result could not fit ")
return dest[:size]
cdef create_dparams_from_kwargs(blosc2_dparams *dparams, kwargs, blosc2_cparams* cparams=NULL):
dparams.nthreads = kwargs.get('nthreads', blosc2.nthreads)
dparams.schunk = NULL