Skip to content

Commit 9afdbcb

Browse files
committed
[executorch][serialization] Data serialization interface
Pull Request resolved: #7194 Introduce data serialization interface. ghstack-source-id: 259211150 @exported-using-ghexport Differential Revision: [D65947145](https://our.internmc.facebook.com/intern/diff/D65947145/)
1 parent ac7a950 commit 9afdbcb

File tree

1 file changed

+94
-0
lines changed

1 file changed

+94
-0
lines changed

exir/_serialize/data_serializer.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from abc import ABC, abstractmethod
2+
from dataclasses import dataclass
3+
from typing import Dict, List, Sequence
4+
5+
from executorch.exir._serialize._cord import Cord
6+
7+
from executorch.exir.schema import ScalarType
8+
9+
10+
@dataclass
11+
class TensorLayout:
12+
"""Tensor layout information for externally-serialized tensors.
13+
14+
Attributes:
15+
scalar_type: type of the elements in the tensor.
16+
sizes: size of each dim in the tensor.
17+
dim_order: specifies the order the dimensions are laid out in memory,
18+
from outer to inner.
19+
"""
20+
21+
scalar_type: ScalarType
22+
sizes: List[int]
23+
dim_order: List[int]
24+
25+
26+
@dataclass
27+
class TensorEntry:
28+
"""Represents a single tensor in `DataPayload`, specifying its location
29+
and metadata.
30+
31+
Attributes:
32+
buffer_index: The index inside `DataPayload.buffers` that this
33+
TensorEntry refers to.
34+
layout: Metadata about the tensor.
35+
"""
36+
37+
buffer_index: int
38+
layout: TensorLayout
39+
40+
41+
@dataclass
42+
class DataPayload:
43+
"""Contains the data and metadata required for serialization. Having an
44+
index-based arrangement instead of Dict[str, bytes] allows the caller to
45+
deduplicate buffers and point multiple fully qualified names (FQNs) to the
46+
same entry.
47+
48+
Attributes:
49+
buffers: a sequence of tensor buffers.
50+
fqn_to_buffer: a map from buffer name (fully qualified name) to TensorEntry.
51+
"""
52+
53+
buffers: Sequence[bytes]
54+
fqn_to_data: Dict[str, TensorEntry]
55+
56+
57+
class DataSerializer(ABC):
58+
"""Serializes and deserializes FQN-tagged tensor data.
59+
60+
This base class enables serialization into different formats. See
61+
executorch/extension/flat_tensor/ for an example.
62+
"""
63+
64+
@abstractmethod
65+
def serialize(
66+
self,
67+
data: DataPayload,
68+
) -> Cord:
69+
"""
70+
Serializes a list of tensors emitted by ExecuTorch into a binary blob.
71+
72+
Args:
73+
serialization_info: the tensor buffers and tensor layout
74+
information required for serialization.
75+
76+
Returns:
77+
A binary blob that contains the serialized data.
78+
"""
79+
raise NotImplementedError("serialize_data")
80+
81+
@abstractmethod
82+
def deserialize(self, blob: Cord) -> DataPayload:
83+
"""
84+
Deserializes a blob into a list of tensors. Reverses the effect of
85+
serialize.
86+
87+
Args:
88+
blob: A binary blob that contains the serialized data.
89+
90+
Returns:
91+
DataPayload: tensor buffers and tensor layout information
92+
deserialized from `blob`.
93+
"""
94+
raise NotImplementedError("deserialize_data")

0 commit comments

Comments
 (0)