From 47a2e382c52d6f1a75c57fee7a81e72ec106c51b Mon Sep 17 00:00:00 2001 From: immerrr Date: Thu, 9 Oct 2014 15:05:08 +0400 Subject: [PATCH 1/2] BUG: fix CategoricalBlock pickling * TST: add categorical frame and series to generate_legacy_pickles * TST: generate pickle for 0.15.0 --- doc/source/v0.15.0.txt | 1 + pandas/core/internals.py | 31 ++++-------------- .../0.15.0/0.15.0_x86_64_linux_2.7.8.pickle | Bin 0 -> 15162 bytes pandas/io/tests/generate_legacy_pickles.py | 11 +++++-- pandas/tests/test_internals.py | 11 ++++++- 5 files changed, 26 insertions(+), 28 deletions(-) create mode 100644 pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index e76a0e57c5e33..65ec2bda807ce 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -1071,3 +1071,4 @@ Bug Fixes - Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`) - Bug in ``Series`` that allows it to be indexed by a ``DataFrame`` which has unexpected results. Such indexing is no longer permitted (:issue:`8444`) - Bug in item assignment of a ``DataFrame`` with multi-index columns where right-hand-side columns were not aligned (:issue:`7655`) +- Bug in unpickling of categorical series and dataframe columns (:issue:`8518`) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c88d799a54fed..9be680d998216 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1070,16 +1070,19 @@ class NonConsolidatableMixIn(object): def __init__(self, values, placement, ndim=None, fastpath=False,): + # Placement must be converted to BlockPlacement via property setter + # before ndim logic, because placement may be a slice which doesn't + # have a length. + self.mgr_locs = placement + # kludgetastic if ndim is None: - if len(placement) != 1: + if len(self.mgr_locs) != 1: ndim = 1 else: ndim = 2 self.ndim = ndim - self.mgr_locs = placement - if not isinstance(values, self._holder): raise TypeError("values must be {0}".format(self._holder.__name__)) @@ -1852,6 +1855,7 @@ def get_values(self, dtype=None): .reshape(self.values.shape) return self.values + class SparseBlock(NonConsolidatableMixIn, Block): """ implement as a list of sparse arrays of the same dtype """ __slots__ = () @@ -1861,27 +1865,6 @@ class SparseBlock(NonConsolidatableMixIn, Block): _ftype = 'sparse' _holder = SparseArray - def __init__(self, values, placement, - ndim=None, fastpath=False,): - - # Placement must be converted to BlockPlacement via property setter - # before ndim logic, because placement may be a slice which doesn't - # have a length. - self.mgr_locs = placement - - # kludgetastic - if ndim is None: - if len(self.mgr_locs) != 1: - ndim = 1 - else: - ndim = 2 - self.ndim = ndim - - if not isinstance(values, SparseArray): - raise TypeError("values must be SparseArray") - - self.values = values - @property def shape(self): return (len(self.mgr_locs), self.sp_index.length) diff --git a/pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle b/pandas/io/tests/data/legacy_pickle/0.15.0/0.15.0_x86_64_linux_2.7.8.pickle new file mode 100644 index 0000000000000000000000000000000000000000..d7d20b06df3050fe8d1fa48e0a3a3cd0ee27d04a GIT binary patch literal 15162 zcmd5@2b>f|*5AzTAP5MGVvb{AC5xDr1;KG;MX=4|IJ4VJ@31?wRWl<&T1>32Ibcqh zbIv*EoWpr|dg>YOPESuo&pW^WdsWrb-OM7&=Y8Ms`|YcGRqu7xd;j+;baO<7E6Sr* zb$h1Oo-0tPj9OK#$-FJ9TDp>%)?}_epR;AVovY_AkwnIxlbDPS`SwmbMp6+`UAk+i zSB)$BM6HVad{NtMRkcZ5h`O*~wyiB^=W~(C$$1eG+44xy*A=Tpt-4HBb|yR8=i8!R zkJTrkx{3ZRG*TK!g#NVJ9r>&+D=yVlAs;+IS48936&6o2snx9h;k*cmEkb{?k!C+Z zA-ZJxK1+#y`N8Mgyal>*ZsB1>PpTUH>|2BKm}4f{X^XX+3*9rsI#y+SCNG8=^Ye^- z{?&e6w?um^lOH!mx3A}l^`p)j&S0l#gAJ4o98A02Onbv6`EFR zTBT{VrZt-O(X>|6I!*H_M@?}ftELOM&9;h-9d8~6%Bm$Yojd5sY~qSdqfVVwJ}oq( z&Gd{mgkP%k_^LIn(X@}IwVKvx8qu_`rmJb%Pg8HWn-{Da6=z}#S40t!o$Yh&RX+7;WNcLI@+6vG3(;bL26k{!9o%=XNz4!f}<+cJAvGLxKT%ZR9l z@V3#o7~!nzz>y=vjvPhPu3qF|+EH!XvS0F&H=WVRxEK?_#aOKlxY*O+VjSaQK`Xaid0$7&F56i!?VQ)V8-O+p<|P zVQPccW9L&C)DCL4JBDYw)2hsN=cFrkv1&%tj~r7!QZ%??B9drwuALK&&Iqd}MRw1L zNpUgR6}v{Q+Rj-r0bWi_X-Onfg?40`Od=7%>{^g8rXpEXbGq2gsyQ^-QGlM9Gq#W{ z1%qRw!KJh=xvIZe&2IXt&LS7mO~VYIyho}94)b;f4~D5lP~pQ*;T zcX)i(3KZ%%DGTQV4{7_r_WL@p`F>HS(V6DJ%KN+GfT%OYDH`%XC+5uX8PKY1Ep&;4 z*otvi9BizZfE5oZrVC$M)JXylONEEuqK7{Yux*W6LFy+E?0=4RVA!C%Wu`{wvKEvFLG?xyekUEu7|>|bCmbzhDS7SRlGkRMm;QG)CF3d z<4ar0LcLjR*+RCMtHyVDczg?2Wqe1#jz^Lmk0LwHAv+%JierLyTm(BV@Y}JXvt4wv z5s!7namI)}Fyiqg!8wigDjK0DxZ=d9Rnw77*&R7?Qe2!Ibq1RR%79KKb9Q8k%+9wn z&66jpJ^U}YGZbO0b!A?=dX(d#cu3=RszVj*Ds-_)EPMjVWXGE>a zY{nLgt;+nItT@xggzQ6ULTAOr*-_=GbHbiFm!4a}E7kz>yrO<*ZD%p) zAn8h$9vaRzX7@qC?3_tjF0`$>Tvq0@velO2BHmS93@|PUfbkM-iIoSV4&O^LNOVsv=^IFTr5$mj?I`QtgiyPgLhTj`wc{z&Zgs_N!BD#$p>|0i)bcrT2Z!37 zuDHvD+T94Xdz@+qN0xh|PHnSO=g?u~K3Ciyb^7%<{hi_=;{j)2kK-LBLVL#tX)cjP z&d}dwPp?;&hk(0>1GsxcKbE$vz4@~@jQ{cu8;^Tb8Rs#daUOTY6U!Or$rTyrssDy? zo(3$QAy_<1u(*d{@tiB3H?WBCM(YK@;t@YADqFCQy~wzD$rUdfT)YBYysBXPS{Syk z`(WFf%_)Cys(1r7d^3=h@j!c4R<^e!J0g>OW$_k(_qHqEiCTRVrW3N{yBG$h_J{B= z-qXXN-N+x2MpH>CmJ*pS-ghPxwT8RLWFFej5;}E>ReY+=InZNj^dg*O??GJ|HBnuM{&WBE2MCG7vEU@ zl!Q_DPrcp#7C`t;&G7s1442UiSLcOfZf`*Nt-%-33j1ohnx_3U?XRf^v>)J#AB!6R zJ%4p`F_WjScL4wLb^t%o`0ExqShRN2>NM$X2Q{EUbr`JaP)(znZq<;Vx^S85V?2>i zLy7RARPSB>-L!fDeE*@5U>zawjnUxyxz`OcUj>jsc(L%04{gM&~<3h^$4$m37Qg zhafCgm}`VO$DE&3s92G<_KuDOpD$%!Gt^qKtyQL@aI?>=lT`jnJlIabuJe zI5wgFya7vV$8QRbD>q|)zn*l+;hU3WZw}vrgxKL}a&nLLfA7C2#(_BAl7`*j%B^UG zPGg#*1EIV%>aN^|x&BZdPW@wP^0JENbU1?RQu&;Y;d=B+Q}m`))E2B-tdDIHhb_4+ z*sdIbzss$W+##aY2;Af+r;tEIjznL?-zXLwjlUH0tLd0mO2?48-mW48mymL-(l(Ct z#N~FZj`lTjJQo93%W`|-!GBeY9PDo71giS4TfEJU+yPArKs&M?$}R>#JAvcMoteKp zfOa9t-T>4=n0K^ z_h2uY6}^n$aW|D1_a3@Gz=55UckF@)ek*Th)qmXvMK{!RW8V&HIr~r3PuBJ)$$$@AE=#5|_!zBN3L5u~m7`{jA804I9|M?qRcVU zxEM?k`xF&Wir5!bWQZgl!{;u!9~eq{{}R#%1f>sT>Bge;L7sHHg!I8d=>$vTC}*C1 zh$o#aA)N|Jx3Dzwu90p9!*-<&LMufZb;n9UDU&vRv#9P$huLbWz}rNauAMr>(v$@X zu1tIF*jRJ>Y}HOthxQJyy`qUvSF%&J6S~o!@!Bh!s#rd&+6grUo8D0HQe)N8`zW;> zbfL-WmuR)ylDMZ#%x=%LN#e&ef8xft?DRA;hk-NKn?u@zmT4D{CaRfiSc}~8HnBr)8p(&1$7n$N%c`=Fr6pXh7 zC5`S&xEOTxrNo1)vEb43E<-htxQtBTB~44VL*5U5T1V~! zRMwIEAj*i`hlnVLJj}(QJ0Brl(VdT?8gS=hV5oR~yoB@9x3?o{Q|D7f+&{Ea&5GF3jymEho>BVIA{Jh-aYQP$=fq?_k>mUS?H>f)xLN+Fdyb0>c zx0vfk$lKH&OVg~?kuhcUBM;&muLQrgmG7X*IPqO7YbX8zWjOIYB5K%w9=?+`xBVTksp-M@S)bAU;Ts~X;x&Qex&l>% zgip}lm49J98VR3rB{;*+v|5ahvd-t+h1b;L48H)gBo_W!6b2T|JvoN|H&(-F_&XPa z5B*ZhX*7I=Dn2hLZVUebhH}c+C8WOzO8=9kecR!0!9cqC4ur7uJ#`0`$kl|UWmI?N z56mVonRN0ab%>?O&`1~m^4js08*d7KQtf0|oi3bV23r3H(?IJ#BpH`K6H&9EtJ(17 zYdP_X(5j$n7+SDsnKK>sq%X7rQAnR^u{15aK!bNO!$SkV-s+I&pJ@-*>FWVWAA+eF z(5GPG8PI28z8TQhVav^cJ{3b;)0bl83;I%wd_lG$U(gq0uI$fT71wJ2zB=`frD>KX zuIu>Iu>?MGkpsXsh95{}9n*tQhT+#BqJ}+~i$U-WAs!5mtq>u8O{yxxFaLduTnnvG zgnN3j*EIFHjp`Za23LhsmiGhb0+WEKqf`xIVd0Y(i*<1|Tufr%;K<`8~kZv1wXmZqnU6)lQZ% z>FOX8L-WBjF?1M7#^nMcY8EaRgElyvcyN>U|3a#&7{cG;e)+#-RiIF}zo$@p zEKLT{iL+o;2UrpVBa6%a&{2EYKUdvoR!mp$D zSejg+5uP4^a$4i|dMfKf(+wyCw>J_|!@Y@%LEPRS;LmL|_&rF_C`H!J0ns-5g;R?3{g z(^Fs?JUva4arq1pHIHYx7_`H4#4F9p);Bt2>r=sXdk^+nH6cQ2## Date: Thu, 9 Oct 2014 10:09:55 -0400 Subject: [PATCH 2/2] COMPAT: Category pickle compat --- pandas/core/categorical.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index aa5fa29784912..dd9b5d71f508f 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -187,6 +187,8 @@ class Categorical(PandasObject): # For comparisons, so that numpy uses our implementation if the compare ops, which raise __array_priority__ = 1000 + ordered = False + name = None def __init__(self, values, categories=None, ordered=None, name=None, fastpath=False, levels=None): @@ -718,6 +720,19 @@ def __array__(self, dtype=None): return np.asarray(ret, dtype) return ret + def __setstate__(self, state): + """Necessary for making this object picklable""" + if not isinstance(state, dict): + raise Exception('invalid pickle state') + + if 'labels' in state: + state['_codes'] = state.pop('labels') + if '_levels' in state: + state['categories'] = state.pop('_levels') + + for k, v in compat.iteritems(state): + setattr(self,k,v) + @property def T(self): return self