Skip to content

Commit 5481c4a

Browse files
committed
BUG: fix column insertion memory leak #790 per #467
1 parent ab9898c commit 5481c4a

File tree

2 files changed

+29
-20
lines changed

2 files changed

+29
-20
lines changed

pandas/core/internals.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,6 @@ def _get_items(self):
315315
return self.axes[0]
316316
items = property(fget=_get_items)
317317

318-
def set_items_norename(self, value):
319-
value = _ensure_index(value)
320-
self.axes[0] = value
321-
322-
for block in self.blocks:
323-
block.set_ref_items(value, maybe_rename=False)
324-
325318
def __getstate__(self):
326319
block_values = [b.values for b in self.blocks]
327320
block_items = [b.items for b in self.blocks]
@@ -579,6 +572,12 @@ def consolidate(self):
579572
new_blocks = _consolidate(self.blocks, self.items)
580573
return BlockManager(new_blocks, self.axes)
581574

575+
def _consolidate_inplace(self):
576+
if self.is_consolidated():
577+
return
578+
579+
self.blocks = _consolidate(self.blocks, self.items)
580+
582581
def get(self, item):
583582
_, block = self._find_block(item)
584583
return block.get(item)
@@ -619,7 +618,7 @@ def set(self, item, value):
619618
if not block.should_store(value):
620619
# delete from block, create and append new block
621620
self._delete_from_block(i, item)
622-
self._add_new_block(item, value)
621+
self._add_new_block(item, value, loc=None)
623622
else:
624623
block.set(item, value)
625624
else:
@@ -632,8 +631,19 @@ def insert(self, loc, item, value):
632631

633632
new_items = self.items.insert(loc, item)
634633
self.set_items_norename(new_items)
634+
635635
# new block
636-
self._add_new_block(item, value)
636+
self._add_new_block(item, value, loc=loc)
637+
638+
if len(self.blocks) > 20:
639+
self._consolidate_inplace()
640+
641+
def set_items_norename(self, value):
642+
value = _ensure_index(value)
643+
self.axes[0] = value
644+
645+
for block in self.blocks:
646+
block.set_ref_items(value, maybe_rename=False)
637647

638648
def _delete_from_block(self, i, item):
639649
"""
@@ -648,11 +658,12 @@ def _delete_from_block(self, i, item):
648658
if new_right is not None:
649659
self.blocks.append(new_right)
650660

651-
def _add_new_block(self, item, value):
661+
def _add_new_block(self, item, value, loc=None):
652662
# Do we care about dtype at the moment?
653663

654664
# hm, elaborate hack?
655-
loc = self.items.get_loc(item)
665+
if loc is None:
666+
loc = self.items.get_loc(item)
656667
new_block = make_block(value, self.items[loc:loc+1].copy(),
657668
self.items)
658669
self.blocks.append(new_block)

scripts/leak.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
from pandas import *
22
import numpy as np
3-
43
import pandas.util.testing as tm
4+
import os
5+
import psutil
56

6-
tm.N = 2000
7-
tm.K = 25
8-
9-
for i in xrange(100):
10-
print i
11-
df = tm.makeTimeDataFrame()
12-
y = df.pop('A')
13-
model = ols(y=y, x=df, window=1999).beta
7+
pid = os.getpid()
8+
proc = psutil.Process(pid)
149

10+
df = DataFrame(index=np.arange(100))
11+
for i in range(5000):
12+
df[i] = 5

0 commit comments

Comments
 (0)