2
2
from __future__ import annotations
3
3
4
4
import io
5
+ import json
5
6
import os
6
7
from typing import (
7
8
Any ,
@@ -154,6 +155,32 @@ def __init__(self):
154
155
155
156
self .api = pyarrow
156
157
158
+ @staticmethod
159
+ def _write_attrs (table , df : DataFrame ):
160
+ schema_metadata = table .schema .metadata or {}
161
+ pandas_metadata = json .loads (schema_metadata .get (b"pandas" , "{}" ))
162
+ column_attrs = {}
163
+ for col in df .columns :
164
+ attrs = df [col ].attrs
165
+ if not attrs or not isinstance (col , str ):
166
+ continue
167
+ column_attrs [col ] = attrs
168
+ pandas_metadata .update (
169
+ attrs = df .attrs ,
170
+ column_attrs = column_attrs ,
171
+ )
172
+ schema_metadata [b"pandas" ] = json .dumps (pandas_metadata )
173
+ return table .replace_schema_metadata (schema_metadata )
174
+
175
+ @staticmethod
176
+ def _read_attrs (table , df : DataFrame ):
177
+ schema_metadata = table .schema .metadata or {}
178
+ pandas_metadata = json .loads (schema_metadata .get (b"pandas" , "{}" ))
179
+ df .attrs = pandas_metadata .get ("attrs" , {})
180
+ col_attrs = pandas_metadata .get ("column_attrs" , {})
181
+ for col , attrs in col_attrs .items ():
182
+ df [col ].attrs = attrs
183
+
157
184
def write (
158
185
self ,
159
186
df : DataFrame ,
@@ -171,6 +198,7 @@ def write(
171
198
from_pandas_kwargs ["preserve_index" ] = index
172
199
173
200
table = self .api .Table .from_pandas (df , ** from_pandas_kwargs )
201
+ table = self ._write_attrs (table , df )
174
202
175
203
path_or_handle , handles , kwargs ["filesystem" ] = _get_path_or_handle (
176
204
path ,
@@ -242,9 +270,11 @@ def read(
242
270
mode = "rb" ,
243
271
)
244
272
try :
245
- result = self .api .parquet .read_table (
273
+ table = self .api .parquet .read_table (
246
274
path_or_handle , columns = columns , ** kwargs
247
- ).to_pandas (** to_pandas_kwargs )
275
+ )
276
+ result = table .to_pandas (** to_pandas_kwargs )
277
+ self ._read_attrs (table , result )
248
278
if manager == "array" :
249
279
result = result ._as_manager ("array" , copy = False )
250
280
return result
0 commit comments