Removing an un-needed conditional in np_datetime_strings.c #21462

uds5501 · 2018-06-13T14:16:25Z

closes to_datetime function not working with %Y.%m.%d %H format #21422
tests added / passed
passes git diff upstream/master -u -- "*.py" | flake8 --diff
whatsnew entry

PS: I was not able to build this properly on my setup, so would wait for the test builds to run.
Also, in which version should I put a whatsnew entry in?

BUG : to_datetime function not working with %Y.%m.%d %H format pandas-dev#21422

WillAyd

What error are you getting locally? Seems like this is going to be difficult to tackle if you can't test locally first

WillAyd · 2018-06-13T16:02:16Z

pandas/_libs/src/datetime/np_datetime_strings.c

@@ -253,9 +253,7 @@ int parse_iso_8601_datetime(char *str, int len,

    /* Next character must be a ':' or the end of the string */
    if (sublen == 0) {


I suppose you don't even need the brackets if this is all that remains within the conditional

WillAyd · 2018-06-13T17:40:13Z

Can you add a test for the referenced issue?

uds5501 · 2018-06-13T17:52:29Z

@WillAyd Actually, I would appreciate a little help here. My build failed with the following error report after running pd.test()

_____________ TestCParserHighMemory.test_usecols_with_parse_dates ______________

self = <pandas.tests.io.parser.test_parsers.TestCParserHighMemory object at 0x7f04da490be0>

    def test_usecols_with_parse_dates(self):
        # See gh-9755
        s = """a,b,c,d,e
            0,1,20140101,0900,4
            0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]
    
        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])
    
        df = self.read_csv(StringIO(s), usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
    
        df = self.read_csv(StringIO(s), usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
    
        # See gh-13604
        s = """2008-02-07 09:40,1032.43
            2008-02-07 09:50,1042.54
            2008-02-07 10:00,1051.65
            """
        parse_dates = [0]
        names = ['date', 'values']
        usecols = names[:]
    
        index = Index([Timestamp('2008-02-07 09:40'),
                       Timestamp('2008-02-07 09:50'),
                       Timestamp('2008-02-07 10:00')],
                      name='date')
        cols = {'values': [1032.43, 1042.54, 1051.65]}
        expected = DataFrame(cols, index=index)
    
        df = self.read_csv(StringIO(s), parse_dates=parse_dates, index_col=0,
                           usecols=usecols, header=None, names=names)
        tm.assert_frame_equal(df, expected)
    
        # See gh-14792
        s = """a,b,c,d,e,f,g,h,i,j
            2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [0]
        usecols = list('abcdefghij')
        cols = {'a': Timestamp('2016-09-21'),
                'b': [1], 'c': [1], 'd': [2],
                'e': [3], 'f': [4], 'g': [5],
                'h': [6], 'i': [7], 'j': [8]}
        expected = DataFrame(cols, columns=usecols)
        df = self.read_csv(StringIO(s), usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
    
        s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [[0, 1]]
        usecols = list('abcdefghij')
        cols = {'a_b': '2016/09/21 1',
                'c': [1], 'd': [2], 'e': [3], 'f': [4],
                'g': [5], 'h': [6], 'i': [7], 'j': [8]}
        expected = DataFrame(cols, columns=['a_b'] + list('cdefghij'))
        df = self.read_csv(StringIO(s), usecols=usecols,
                           parse_dates=parse_dates)
>       tm.assert_frame_equal(df, expected)

pandas/tests/io/parser/usecols.py:261: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
pandas/util/testing.py:1353: in assert_frame_equal
    obj='DataFrame.iloc[:, {idx}]'.format(idx=i))
pandas/util/testing.py:1198: in assert_series_equal
    assert_attr_equal('dtype', left, right)
pandas/util/testing.py:929: in assert_attr_equal
    raise_assert_detail(obj, msg, left_attr, right_attr)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

obj = 'Attributes', message = 'Attribute "dtype" are different'
left = dtype('<M8[ns]'), right = dtype('O'), diff = None

    def raise_assert_detail(obj, message, left, right, diff=None):
        if isinstance(left, np.ndarray):
            left = pprint_thing(left)
        elif is_categorical_dtype(left):
            left = repr(left)
    
        if PY2 and isinstance(left, string_types):
            # left needs to be printable in native text type in python2
            left = left.encode('utf-8')
    
        if isinstance(right, np.ndarray):
            right = pprint_thing(right)
        elif is_categorical_dtype(right):
            right = repr(right)
    
        if PY2 and isinstance(right, string_types):
            # right needs to be printable in native text type in python2
            right = right.encode('utf-8')
    
        msg = """{obj} are different
    
    {message}
    [left]:  {left}
    [right]: {right}""".format(obj=obj, message=message, left=left, right=right)
    
        if diff is not None:
            msg += "\n[diff]: {diff}".format(diff=diff)
    
>       raise AssertionError(msg)
E       AssertionError: Attributes are different
E       
E       Attribute "dtype" are different
E       [left]:  datetime64[ns]
E       [right]: object

pandas/util/testing.py:1023: AssertionError

Do correct me if I am wrong, but this test is supposed to mean that df' and expected have different attribute dtypes here, right? So I tried recreating the test using the given utilities and checked df.info() and expected.info() to get the following output:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 9 columns):
a_b    1 non-null object
c      1 non-null int64
d      1 non-null int64
e      1 non-null int64
f      1 non-null int64
g      1 non-null int64
h      1 non-null int64
i      1 non-null int64
j      1 non-null int64
dtypes: int64(8), object(1)
memory usage: 152.0+ bytes
None
            a_b  c  d  e  f  g  h  i  j
0  2016/09/21 1  1  2  3  4  5  6  7  8
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 9 columns):
a_b    1 non-null object
c      1 non-null int64
d      1 non-null int64
e      1 non-null int64
f      1 non-null int64
g      1 non-null int64
h      1 non-null int64
i      1 non-null int64
j      1 non-null int64
dtypes: int64(8), object(1)
memory usage: 152.0+ bytes
None
            a_b  c  d  e  f  g  h  i  j
0  2016/09/21 1  1  2  3  4  5  6  7  8

# Ran without throwing any error

But here I cannot see any datetime64[ns] object , so what exactly is this build error trying to show me?
NOTE : this is only one of the errors which I tried to investigate , the testing summary was as follows:

5 failed, 24042 passed, 2296 skipped, 80 xfailed, 31 xpassed, 14 warnings in 471.57 seconds

WillAyd · 2018-06-13T17:56:39Z

Oh OK - just to clarify that's not a "build error" (which would come while doing python setup.py build_ext --inplace) but rather a test error occurred after making the change.

Does that test work when you perform a clean build on master and subsequently fail when you re-build after making the change you've pushed? If so that's indicative that your change as is wouldn't work as it's breaking existing test cases so something would need to change

uds5501 · 2018-06-13T18:29:15Z

@WillAyd Thank you for that!
Well yes, the tests pass on the master build. I guess I got to tweak the code a little

WillAyd · 2018-06-13T18:33:10Z

You are correct in that the error message is telling you that the dtypes of the various columns are not aligned. Can you inspect a little further to see the difference though? Just glancing over it I'm assuming expected contains the object dtype but maybe it shouldn't?

Sometimes tests are wrong so try to debug and let me know if you can find any insights

uds5501 · 2018-06-13T18:43:28Z

@WillAyd Okay, sure. I will work on it

uds5501 · 2018-06-14T05:05:09Z

@WillAyd I wanted to know something about the dataframe reading syntaxes in classes.

pandas/pandas/tests/io/parser/usecols.py

Line 210 in b5fc769

df = self.read_csv(StringIO(s), usecols=[0, 2, 3],

I want to know what is the difference between self.read_csv and pd.read_csv . Also, it would be great to know how does this self.read_csv works given the fact that there is no method as such defined within the class

WillAyd · 2018-06-14T05:24:12Z

It could be clearer but the self in those instances are subclasses of the UsecolsTest class in the module. If you look in test_parsers you should see how the subclasses actually implement that

jbrockmendel · 2018-06-14T16:19:57Z

Just to make sure I understand the issue this PR is trying to address: ATM pd.to_datetime("2016.05.06 01") returns pd.Timestamp("2016-05-06 01:00:00") but pd.to_datetime("2016.05.06 1") raises ValueError. And the point of the PR is to make the latter return the same as the former?

uds5501 · 2018-06-14T17:06:08Z

@jbrockmendel Yes, that is correct

jreback · 2018-09-25T16:27:48Z

can you rebase

jreback · 2018-11-23T03:26:42Z

closing as stale.

Remove an un-needed conditional

5646af4

BUG : to_datetime function not working with %Y.%m.%d %H format pandas-dev#21422

WillAyd requested changes Jun 13, 2018

View reviewed changes

Update np_datetime_strings.c

9c049aa

gfyoung added Bug Datetime Datetime data dtype labels Jun 13, 2018

jreback closed this Nov 23, 2018

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Removing an un-needed conditional in np_datetime_strings.c #21462

Removing an un-needed conditional in np_datetime_strings.c #21462

Uh oh!

uds5501 commented Jun 13, 2018 •

edited

Loading

Uh oh!

WillAyd left a comment

Uh oh!

WillAyd Jun 13, 2018

Uh oh!

WillAyd commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 13, 2018

Uh oh!

WillAyd commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 13, 2018

Uh oh!

WillAyd commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 14, 2018

Uh oh!

WillAyd commented Jun 14, 2018

Uh oh!

jbrockmendel commented Jun 14, 2018

Uh oh!

uds5501 commented Jun 14, 2018

Uh oh!

jreback commented Sep 25, 2018

Uh oh!

jreback commented Nov 23, 2018

Uh oh!

Uh oh!

		@@ -253,9 +253,7 @@ int parse_iso_8601_datetime(char *str, int len,

		/* Next character must be a ':' or the end of the string */
		if (sublen == 0) {

Uh oh!

Removing an un-needed conditional in np_datetime_strings.c #21462

Removing an un-needed conditional in np_datetime_strings.c #21462

Uh oh!

Conversation

uds5501 commented Jun 13, 2018 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

WillAyd left a comment

Choose a reason for hiding this comment

Uh oh!

WillAyd Jun 13, 2018

Choose a reason for hiding this comment

Uh oh!

WillAyd commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 13, 2018

Uh oh!

WillAyd commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 13, 2018

Uh oh!

WillAyd commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 13, 2018

Uh oh!

uds5501 commented Jun 14, 2018

Uh oh!

WillAyd commented Jun 14, 2018

Uh oh!

jbrockmendel commented Jun 14, 2018

Uh oh!

uds5501 commented Jun 14, 2018

Uh oh!

jreback commented Sep 25, 2018

Uh oh!

jreback commented Nov 23, 2018

Uh oh!

Uh oh!

uds5501 commented Jun 13, 2018 •

edited

Loading