From c919b73873f31666048689c0595dca3e4f255eb1 Mon Sep 17 00:00:00 2001 From: Jacob Bundgaard Date: Fri, 1 Mar 2019 14:00:14 +0100 Subject: [PATCH 1/7] Update documentation of read_csv to explain that index_col can be a string containg a column name. --- doc/source/user_guide/io.rst | 11 ++++++----- pandas/io/parsers.py | 13 ++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index b23a0f10e9e2b..7732869b22778 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -110,11 +110,12 @@ names : array-like, default ``None`` List of column names to use. If file contains no header row, then you should explicitly pass ``header=None``. Duplicates in this list will cause a ``UserWarning`` to be issued. -index_col : int or sequence or ``False``, default ``None`` - Column to use as the row labels of the ``DataFrame``. If a sequence is given, a - MultiIndex is used. If you have a malformed file with delimiters at the end of - each line, you might consider ``index_col=False`` to force pandas to *not* use - the first column as the index (row names). +index_col : int/string or sequence of int/string or ``False``, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. + If a sequence of int/string is given, a MultiIndex is used. + Columns used for the index (row names) are dropped from the actual columns of the input dataframe. + They are accessible via ``.index``. + (Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g. when you have a malformed file with delimiters at the end of each line.) usecols : list-like or callable, default ``None`` Return a subset of the columns. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4163a571df800..b0f66a3787b66 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -102,11 +102,14 @@ List of column names to use. If file contains no header row, then you should explicitly pass ``header=None``. Duplicates in this list will cause a ``UserWarning`` to be issued. -index_col : int, sequence or bool, optional - Column to use as the row labels of the DataFrame. If a sequence is given, a - MultiIndex is used. If you have a malformed file with delimiters at the end - of each line, you might consider ``index_col=False`` to force pandas to - not use the first column as the index (row names). +index_col : int/string or sequence of int/string or ``False``, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int/string is given, a + MultiIndex is used. Columns used for the index (row names) are dropped from + the actual columns of the input dataframe. They are accessible via + ``.index``. (Note: ``index_col=False`` can be used to force pandas to *not* + use the first column as the index, e.g. when you have a malformed file with + delimiters at the end of each line.) usecols : list-like or callable, optional Return a subset of the columns. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings From f8d058b2f2e969f3d8a47f2d5bd9058d99fc23d5 Mon Sep 17 00:00:00 2001 From: Jacob Hjort Bundgaard Date: Sun, 3 Mar 2019 15:11:25 +0100 Subject: [PATCH 2/7] Improve wording of documentation for index_col parameter in read_csv. --- doc/source/user_guide/io.rst | 4 ++-- pandas/io/parsers.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 7732869b22778..e43a9afbd7161 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -110,9 +110,9 @@ names : array-like, default ``None`` List of column names to use. If file contains no header row, then you should explicitly pass ``header=None``. Duplicates in this list will cause a ``UserWarning`` to be issued. -index_col : int/string or sequence of int/string or ``False``, default ``None`` +index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. - If a sequence of int/string is given, a MultiIndex is used. + If a sequence of int / str is given, a MultiIndex is used. Columns used for the index (row names) are dropped from the actual columns of the input dataframe. They are accessible via ``.index``. (Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g. when you have a malformed file with delimiters at the end of each line.) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index b0f66a3787b66..9167ed6439797 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -102,9 +102,9 @@ List of column names to use. If file contains no header row, then you should explicitly pass ``header=None``. Duplicates in this list will cause a ``UserWarning`` to be issued. -index_col : int/string or sequence of int/string or ``False``, default ``None`` +index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as - string name or column index. If a sequence of int/string is given, a + string name or column index. If a sequence of int / str is given, a MultiIndex is used. Columns used for the index (row names) are dropped from the actual columns of the input dataframe. They are accessible via ``.index``. (Note: ``index_col=False`` can be used to force pandas to *not* From b65927e0a9bf8e72e952fde9882a94059c4d1b28 Mon Sep 17 00:00:00 2001 From: Jacob Bundgaard Date: Thu, 14 Mar 2019 12:00:40 +0100 Subject: [PATCH 3/7] Remove unnecessary documentation Remove description of columns used as indexes not also being present as normal columns in documentation for read_csv. --- doc/source/user_guide/io.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index e43a9afbd7161..4c7c7080f1f0a 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -113,8 +113,6 @@ names : array-like, default ``None`` index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. If a sequence of int / str is given, a MultiIndex is used. - Columns used for the index (row names) are dropped from the actual columns of the input dataframe. - They are accessible via ``.index``. (Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g. when you have a malformed file with delimiters at the end of each line.) usecols : list-like or callable, default ``None`` Return a subset of the columns. If list-like, all elements must either From 8a81ba2612fdc77b3681d37d9a2d0e1d3d407bad Mon Sep 17 00:00:00 2001 From: Jacob Bundgaard Date: Thu, 14 Mar 2019 13:51:14 +0100 Subject: [PATCH 4/7] Improve formatting of read_csv documentation. --- doc/source/user_guide/io.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 4c7c7080f1f0a..521cd6ba58d83 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -111,9 +111,13 @@ names : array-like, default ``None`` explicitly pass ``header=None``. Duplicates in this list will cause a ``UserWarning`` to be issued. index_col : int, str, sequence of int / str, or False, default ``None`` - Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. - If a sequence of int / str is given, a MultiIndex is used. - (Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g. when you have a malformed file with delimiters at the end of each line.) + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int / str is given, a + MultiIndex is used. + + Note: ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. usecols : list-like or callable, default ``None`` Return a subset of the columns. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings From eb09a5771acf52320ee8599d766e12d578b2c18e Mon Sep 17 00:00:00 2001 From: Jacob Bundgaard Date: Thu, 14 Mar 2019 14:07:42 +0100 Subject: [PATCH 5/7] Improve formatting of read_csv documentation. --- pandas/io/parsers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9167ed6439797..4a3d292201c16 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -107,9 +107,11 @@ string name or column index. If a sequence of int / str is given, a MultiIndex is used. Columns used for the index (row names) are dropped from the actual columns of the input dataframe. They are accessible via - ``.index``. (Note: ``index_col=False`` can be used to force pandas to *not* - use the first column as the index, e.g. when you have a malformed file with - delimiters at the end of each line.) + ``.index``. + + Note: ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. usecols : list-like or callable, optional Return a subset of the columns. If list-like, all elements must either be positional (i.e. integer indices into the document columns) or strings From f4458ea1acc076d451f3e8c030e826f2bce49443 Mon Sep 17 00:00:00 2001 From: Jacob Bundgaard Date: Thu, 14 Mar 2019 15:26:11 +0100 Subject: [PATCH 6/7] Remove tailing whitespace. --- doc/source/user_guide/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 521cd6ba58d83..ee0b156027f5e 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -114,7 +114,7 @@ index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. If a sequence of int / str is given, a MultiIndex is used. - + Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g. when you have a malformed file with delimiters at the end of each line. From 1aa334efb0526c2b05aaa0aa2f1ffedc645b8dab Mon Sep 17 00:00:00 2001 From: Jacob Bundgaard Date: Fri, 15 Mar 2019 21:23:11 +0100 Subject: [PATCH 7/7] Remove unnecessary documentation Remove description of columns used as indexes not also being present as normal columns in documentation for read_csv. --- pandas/io/parsers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4a3d292201c16..4e465f39aa3e5 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -105,9 +105,7 @@ index_col : int, str, sequence of int / str, or False, default ``None`` Column(s) to use as the row labels of the ``DataFrame``, either given as string name or column index. If a sequence of int / str is given, a - MultiIndex is used. Columns used for the index (row names) are dropped from - the actual columns of the input dataframe. They are accessible via - ``.index``. + MultiIndex is used. Note: ``index_col=False`` can be used to force pandas to *not* use the first column as the index, e.g. when you have a malformed file with delimiters at