@@ -655,16 +655,29 @@ def _get_index_name(self):
655
655
self .index_col = range (implicit_first_cols )
656
656
index_name = None
657
657
elif np .isscalar (self .index_col ):
658
+ if isinstance (self .index_col , basestring ):
659
+ for i , c in enumerate (list (columns )):
660
+ if c == self .index_col :
661
+ self .index_col = i
658
662
index_name = columns .pop (self .index_col )
663
+
659
664
if index_name is not None and 'Unnamed' in index_name :
660
665
index_name = None
666
+
661
667
elif self .index_col is not None :
662
668
cp_cols = list (columns )
663
669
index_name = []
664
- for i in self .index_col :
665
- name = cp_cols [i ]
666
- columns .remove (name )
667
- index_name .append (name )
670
+ index_col = list (self .index_col )
671
+ for i , c in enumerate (index_col ):
672
+ if isinstance (c , basestring ):
673
+ index_name = c
674
+ for j , name in enumerate (cp_cols ):
675
+ if name == index_name :
676
+ index_col [i ] = j
677
+ else :
678
+ name = cp_cols [c ]
679
+ columns .remove (name )
680
+ index_name .append (name )
668
681
669
682
return index_name
670
683
@@ -698,42 +711,10 @@ def get_chunk(self, rows=None):
698
711
zipped_content = list (lib .to_object_array (content ).T )
699
712
700
713
if self .index_col is not None :
701
- if np .isscalar (self .index_col ):
702
- index = zipped_content .pop (self .index_col )
703
- else : # given a list of index
704
- index = []
705
- for idx in self .index_col :
706
- index .append (zipped_content [idx ])
707
- # remove index items from content and columns, don't pop in
708
- # loop
709
- for i in reversed (sorted (self .index_col )):
710
- zipped_content .pop (i )
711
-
712
- if np .isscalar (self .index_col ):
713
- if self ._should_parse_dates (self .index_col ):
714
- index = self ._conv_date (index )
715
- index , na_count = _convert_types (index , self .na_values )
716
- index = Index (index , name = self .index_name )
717
- if self .verbose and na_count :
718
- print 'Found %d NA values in the index' % na_count
719
- else :
720
- arrays = []
721
- for i , arr in enumerate (index ):
722
- if self ._should_parse_dates (self .index_col [i ]):
723
- arr = self ._conv_date (arr )
724
- arr , _ = _convert_types (arr , self .na_values )
725
- arrays .append (arr )
726
- index = MultiIndex .from_arrays (arrays , names = self .index_name )
714
+ index = self ._extract_index (zipped_content )
727
715
else :
728
716
index = Index (np .arange (len (content )))
729
717
730
- # if not index.is_unique:
731
- # dups = index.get_duplicates()
732
- # idx_str = 'Index' if not self._implicit_index else 'Implicit index'
733
- # err_msg = ('%s (columns %s) have duplicate values %s'
734
- # % (idx_str, self.index_col, str(dups)))
735
- # raise Exception(err_msg)
736
-
737
718
col_len , zip_len = len (self .columns ), len (zipped_content )
738
719
if col_len != zip_len :
739
720
row_num = - 1
@@ -769,6 +750,35 @@ def get_chunk(self, rows=None):
769
750
return df [df .columns [0 ]]
770
751
return df
771
752
753
+ def _extract_index (self , zipped_content ):
754
+ if np .isscalar (self .index_col ):
755
+ index = zipped_content .pop (self .index_col )
756
+ else : # given a list of index
757
+ index = []
758
+ for idx in self .index_col :
759
+ index .append (zipped_content [idx ])
760
+ # remove index items from content and columns, don't pop in
761
+ # loop
762
+ for i in reversed (sorted (self .index_col )):
763
+ zipped_content .pop (i )
764
+
765
+ if np .isscalar (self .index_col ):
766
+ if self ._should_parse_dates (self .index_col ):
767
+ index = self ._conv_date (index )
768
+ index , na_count = _convert_types (index , self .na_values )
769
+ index = Index (index , name = self .index_name )
770
+ if self .verbose and na_count :
771
+ print 'Found %d NA values in the index' % na_count
772
+ else :
773
+ arrays = []
774
+ for i , arr in enumerate (index ):
775
+ if self ._should_parse_dates (self .index_col [i ]):
776
+ arr = self ._conv_date (arr )
777
+ arr , _ = _convert_types (arr , self .na_values )
778
+ arrays .append (arr )
779
+ index = MultiIndex .from_arrays (arrays , names = self .index_name )
780
+ return index
781
+
772
782
def _find_line_number (self , exp_len , chunk_len , chunk_i ):
773
783
if exp_len is None :
774
784
prev_pos = 0
0 commit comments