Skip to content

Serialization for trained models/pipelines are not portable to hosts with different byteorder.  #9428

@andrewsi-z

Description

@andrewsi-z

How to reproduce the behaviour

Load a pre-trained model from little endian platform on big endian platform and attempt to use.
This results in the exception "ValueError: Little-endian buffer not supported on big-endian compiler" as seen below in this post.

Your Environment

Info about spaCy

  • spaCy version: 3.1.3
  • Platform: Linux-4.18.0-305.12.1.el8_4.s390x-s390x-with-glibc2.29
  • Python version: 3.8.10
  • Pipelines: en_core_web_sm (3.1.0)
  • Environment Information: spaCy was built from source on an s390x system, which is big endian.

Example: loading x86 saved models from fails:

nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sentence.")

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_75/3390045506.py in <module>
----> 1 doc = nlp("This is a sentence.")

/git/spaCy/spacy/language.py in __call__(self, text, disable, component_cfg)
   1003                 raise ValueError(Errors.E109.format(name=name)) from e
   1004             except Exception as e:
-> 1005                 error_handler(name, proc, [doc], e)
   1006             if doc is None:
   1007                 raise ValueError(Errors.E005.format(name=name))

/git/spaCy/spacy/util.py in raise_error(proc_name, proc, docs, e)
   1533 
   1534 def raise_error(proc_name, proc, docs, e):
-> 1535     raise e
   1536 
   1537 

/git/spaCy/spacy/language.py in __call__(self, text, disable, component_cfg)
    998                 error_handler = proc.get_error_handler()
    999             try:
-> 1000                 doc = proc(doc, **component_cfg.get(name, {}))
   1001             except KeyError as e:
   1002                 # This typically happens if a component is not initialized

/git/spaCy/spacy/pipeline/trainable_pipe.pyx in spacy.pipeline.trainable_pipe.TrainablePipe.__call__()

/git/spaCy/spacy/util.py in raise_error(proc_name, proc, docs, e)
   1533 
   1534 def raise_error(proc_name, proc, docs, e):
-> 1535     raise e
   1536 
   1537 

/git/spaCy/spacy/pipeline/trainable_pipe.pyx in spacy.pipeline.trainable_pipe.TrainablePipe.__call__()

/git/spaCy/spacy/pipeline/tok2vec.py in predict(self, docs)
    119         DOCS: https://spacy.io/api/tok2vec#predict
    120         """
--> 121         tokvecs = self.model.predict(docs)
    122         batch_id = Tok2VecListener.get_batch_id(docs)
    123         for listener in self.listeners:

/usr/local/lib/python3.8/dist-packages/thinc/model.py in predict(self, X)
    313         only the output, instead of the `(output, callback)` tuple.
    314         """
--> 315         return self._func(self, X, is_train=False)[0]
    316 
    317     def finish_update(self, optimizer: Optimizer) -> None:

/usr/local/lib/python3.8/dist-packages/thinc/layers/chain.py in forward(model, X, is_train)
     52     callbacks = []
     53     for layer in model.layers:
---> 54         Y, inc_layer_grad = layer(X, is_train=is_train)
     55         callbacks.append(inc_layer_grad)
     56         X = Y

/usr/local/lib/python3.8/dist-packages/thinc/model.py in __call__(self, X, is_train)
    289         """Call the model's `forward` function, returning the output and a
    290         callback to compute the gradients via backpropagation."""
--> 291         return self._func(self, X, is_train=is_train)
    292 
    293     def initialize(self, X: Optional[InT] = None, Y: Optional[OutT] = None) -> "Model":

/usr/local/lib/python3.8/dist-packages/thinc/layers/chain.py in forward(model, X, is_train)
     52     callbacks = []
     53     for layer in model.layers:
---> 54         Y, inc_layer_grad = layer(X, is_train=is_train)
     55         callbacks.append(inc_layer_grad)
     56         X = Y

/usr/local/lib/python3.8/dist-packages/thinc/model.py in __call__(self, X, is_train)
    289         """Call the model's `forward` function, returning the output and a
    290         callback to compute the gradients via backpropagation."""
--> 291         return self._func(self, X, is_train=is_train)
    292 
    293     def initialize(self, X: Optional[InT] = None, Y: Optional[OutT] = None) -> "Model":

/usr/local/lib/python3.8/dist-packages/thinc/layers/with_array.py in forward(model, Xseq, is_train)
     28 def forward(model: Model[SeqT, SeqT], Xseq: SeqT, is_train: bool):
     29     if isinstance(Xseq, Ragged):
---> 30         return _ragged_forward(
     31             cast(Model[Ragged, Ragged], model), cast(Ragged, Xseq), is_train
     32         )

/usr/local/lib/python3.8/dist-packages/thinc/layers/with_array.py in _ragged_forward(model, Xr, is_train)
     88 ) -> Tuple[Ragged, Callable]:
     89     layer: Model[ArrayXd, ArrayXd] = model.layers[0]
---> 90     Y, get_dX = layer(Xr.dataXd, is_train)
     91 
     92     def backprop(dYr: Ragged) -> Ragged:

/usr/local/lib/python3.8/dist-packages/thinc/model.py in __call__(self, X, is_train)
    289         """Call the model's `forward` function, returning the output and a
    290         callback to compute the gradients via backpropagation."""
--> 291         return self._func(self, X, is_train=is_train)
    292 
    293     def initialize(self, X: Optional[InT] = None, Y: Optional[OutT] = None) -> "Model":

/usr/local/lib/python3.8/dist-packages/thinc/layers/chain.py in forward(model, X, is_train)
     52     callbacks = []
     53     for layer in model.layers:
---> 54         Y, inc_layer_grad = layer(X, is_train=is_train)
     55         callbacks.append(inc_layer_grad)
     56         X = Y

/usr/local/lib/python3.8/dist-packages/thinc/model.py in __call__(self, X, is_train)
    289         """Call the model's `forward` function, returning the output and a
    290         callback to compute the gradients via backpropagation."""
--> 291         return self._func(self, X, is_train=is_train)
    292 
    293     def initialize(self, X: Optional[InT] = None, Y: Optional[OutT] = None) -> "Model":

/usr/local/lib/python3.8/dist-packages/thinc/layers/chain.py in forward(model, X, is_train)
     52     callbacks = []
     53     for layer in model.layers:
---> 54         Y, inc_layer_grad = layer(X, is_train=is_train)
     55         callbacks.append(inc_layer_grad)
     56         X = Y

/usr/local/lib/python3.8/dist-packages/thinc/model.py in __call__(self, X, is_train)
    289         """Call the model's `forward` function, returning the output and a
    290         callback to compute the gradients via backpropagation."""
--> 291         return self._func(self, X, is_train=is_train)
    292 
    293     def initialize(self, X: Optional[InT] = None, Y: Optional[OutT] = None) -> "Model":

/usr/local/lib/python3.8/dist-packages/thinc/layers/maxout.py in forward(model, X, is_train)
     47     W = model.get_param("W")
     48     W = model.ops.reshape2f(W, nO * nP, nI)
---> 49     Y = model.ops.gemm(X, W, trans2=True)
     50     Y += model.ops.reshape1f(b, nO * nP)
     51     Z = model.ops.reshape3f(Y, Y.shape[0], nO, nP)

/usr/local/lib/python3.8/dist-packages/thinc/backends/numpy_ops.pyx in thinc.backends.numpy_ops.NumpyOps.gemm()

/usr/local/lib/python3.8/dist-packages/blis/py.pyx in blis.py.gemm()

ValueError: Little-endian buffer not supported on big-endian compiler

Metadata

Metadata

Assignees

No one assigned

    Labels

    compatCross-platform and cross-Python compatibilityfeat / serializeFeature: Serialization, saving and loading

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions