diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 3c7a80f096844..628746f541102 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -166,8 +166,10 @@ Categorical Datetimelike ^^^^^^^^^^^^ - Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`) +- Bug in :func:`to_datetime` was raising on invalid offsets with ``errors='coerce'`` and ``infer_datetime_format=True`` (:issue:`48633`) - Bug in :class:`DatetimeIndex` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``dtype`` or data (:issue:`48659`) - Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`) +- Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 9d0479ec8dbf1..a181133c14f2b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -943,7 +943,7 @@ def format_is_iso(f: str) -> bint: return False -def guess_datetime_format(dt_str, bint dayfirst=False): +def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: """ Guess the datetime format of a given datetime string. @@ -1026,7 +1026,12 @@ def guess_datetime_format(dt_str, bint dayfirst=False): # This separation will prevent subsequent processing # from correctly parsing the time zone format. # So in addition to the format nomalization, we rejoin them here. - tokens[offset_index] = parsed_datetime.strftime("%z") + try: + tokens[offset_index] = parsed_datetime.strftime("%z") + except ValueError: + # Invalid offset might not have raised in du_parse + # https://github.com/dateutil/dateutil/issues/188 + return None tokens = tokens[:offset_index + 1 or None] format_guess = [None] * len(tokens) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 1d092d9563f00..5c6b4c2434b88 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1142,13 +1142,37 @@ def test_to_datetime_coerce(self): ) tm.assert_index_equal(result, expected) - def test_to_datetime_coerce_malformed(self): + @pytest.mark.parametrize("infer_datetime_format", [True, False]) + @pytest.mark.parametrize( + "errors, expected", + [ + ("coerce", Index([NaT, NaT])), + ("ignore", Index(["200622-12-31", "111111-24-11"])), + ], + ) + def test_to_datetime_malformed_no_raise( + self, errors, expected, infer_datetime_format + ): # GH 28299 + # GH 48633 ts_strings = ["200622-12-31", "111111-24-11"] - result = to_datetime(ts_strings, errors="coerce") - expected = Index([NaT, NaT]) + result = to_datetime( + ts_strings, errors=errors, infer_datetime_format=infer_datetime_format + ) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("infer_datetime_format", [True, False]) + def test_to_datetime_malformed_raise(self, infer_datetime_format): + # GH 48633 + ts_strings = ["200622-12-31", "111111-24-11"] + with pytest.raises( + ValueError, + match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$", + ): + to_datetime( + ts_strings, errors="raise", infer_datetime_format=infer_datetime_format + ) + def test_iso_8601_strings_with_same_offset(self): # GH 17697, 11736 ts_str = "2015-11-18 15:30:00+05:30" diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 4dae6c586e306..03084fcbdcb11 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -212,8 +212,6 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): "1/1/1/1", "this_is_not_a_datetime", "51a", - 9, - datetime(2011, 1, 1), ], ) def test_guess_datetime_format_invalid_inputs(invalid_dt): @@ -222,6 +220,17 @@ def test_guess_datetime_format_invalid_inputs(invalid_dt): assert parsing.guess_datetime_format(invalid_dt) is None +@pytest.mark.parametrize("invalid_type_dt", [9, datetime(2011, 1, 1)]) +def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt): + # A datetime string must include a year, month and a day for it to be + # guessable, in addition to being a string that looks like a datetime. + with pytest.raises( + TypeError, + match=r"^Argument 'dt_str' has incorrect type \(expected str, got .*\)$", + ): + parsing.guess_datetime_format(invalid_type_dt) + + @pytest.mark.parametrize( "string,fmt", [