diff --git a/doc/source/release.rst b/doc/source/release.rst index 0e6924e4b0122..cf50dd52e70e4 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -186,6 +186,7 @@ Bug Fixes - Disabled clipboard tests until release time (run locally with ``nosetests -A disabled`` (:issue:`6048`). - Bug in ``DataFrame.replace()`` when passing a nested ``dict`` that contained keys not in the values to be replaced (:issue:`6342`) +- ``str.match`` ignored the na flag (:issue:`6609`). - Bug in take with duplicate columns not consolidated (:issue:`6240`) - Bug in interpolate changing dtypes (:issue:`6290`) - Bug in Series.get, was using a buggy access method (:issue:`6383`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 3e3d1e2dbd76e..6add1767a05d6 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -364,11 +364,11 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=False): # Do this first, to make sure it happens even if the re.compile # raises below. warnings.warn("In future versions of pandas, match will change to" - " always return a bool indexer.""", UserWarning) + " always return a bool indexer.", UserWarning) if as_indexer and regex.groups > 0: warnings.warn("This pattern has match groups. To actually get the" - " groups, use str.extract.""", UserWarning) + " groups, use str.extract.", UserWarning) # If not as_indexer and regex.groups == 0, this returns empty lists # and is basically useless, so we will not warn. @@ -384,7 +384,7 @@ def f(x): # This is the new behavior of str_match. f = lambda x: bool(regex.match(x)) - return _na_map(f, arr) + return _na_map(f, arr, na) def str_extract(arr, pat, flags=0): @@ -887,6 +887,12 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): na=na, regex=regex) return self._wrap_result(result) + @copy(str_match) + def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False): + result = str_match(self.series, pat, case=case, flags=flags, + na=na, as_indexer=as_indexer) + return self._wrap_result(result) + @copy(str_replace) def replace(self, pat, repl, n=-1, case=True, flags=0): result = str_replace(self.series, pat, repl, n=n, case=case, @@ -951,7 +957,6 @@ def get_dummies(self, sep='|'): startswith = _pat_wrapper(str_startswith, na=True) endswith = _pat_wrapper(str_endswith, na=True) findall = _pat_wrapper(str_findall, flags=True) - match = _pat_wrapper(str_match, flags=True) extract = _pat_wrapper(str_extract, flags=True) len = _noarg_wrapper(str_len) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 53cf3d9b5ecc5..2721edcc89e59 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -220,7 +220,7 @@ def test_contains(self): # na values = Series(['om', 'foo',np.nan]) res = values.str.contains('foo', na="foo") - self.assertEqual (res.ix[2], "foo" ) + self.assertEqual (res.ix[2], "foo") def test_startswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) @@ -460,6 +460,14 @@ def test_match(self): exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) + # na GH #6609 + res = Series(['a', 0, np.nan]).str.match('a', na=False) + exp = Series([True, False, False]) + assert_series_equal(exp, res) + res = Series(['a', 0, np.nan]).str.match('a') + exp = Series([True, np.nan, np.nan]) + assert_series_equal(exp, res) + def test_extract(self): # Contains tests like those in test_match and some others.