diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 4901ea3911..5d52ca8589 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -1631,6 +1631,14 @@ struct pyobject_caster { template ::value, int> = 0> bool load(handle src, bool /* convert */) { +#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES + if (std::is_same::value && isinstance(src)) { + PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr); + if (!str_from_bytes) throw error_already_set(); + value = reinterpret_steal(str_from_bytes); + return true; + } +#endif if (!isinstance(src)) return false; value = reinterpret_borrow(src); diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 3244ed8c52..2217b974ab 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) { inline bool PyNone_Check(PyObject *o) { return o == Py_None; } inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } -inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } - inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } class kwargs_proxy : public handle { @@ -885,7 +883,7 @@ class bytes; class str : public object { public: - PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str) + PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str) str(const char *c, size_t n) : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) { diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 6c2bebda87..435970e0e3 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -144,7 +144,7 @@ template struct list_caster { using value_conv = make_caster; bool load(handle src, bool convert) { - if (!isinstance(src) || isinstance(src)) + if (!isinstance(src) || isinstance(src) || isinstance(src)) return false; auto s = reinterpret_borrow(src); value.clear(); diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index 0f8d56410f..008bccae4d 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -372,4 +372,11 @@ TEST_SUBMODULE(pytypes, m) { buf, static_cast(strlen(buf))); }); #endif + + m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance(o); }); + m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance(o); }); + + m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); }); + m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); }); + m.def("pass_to_std_string", [](std::string s) { return s.size(); }); } diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 0e82f5258e..a66cb6af65 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -190,7 +190,7 @@ def test_constructors(): """C++ default and converting constructors are equivalent to type calls in Python""" types = [bytes, str, bool, int, float, tuple, list, dict, set] expected = {t.__name__: t() for t in types} - if str is bytes: # Python 2. + if pytest.PY2: # Note that bytes.__name__ == 'str' in Python 2. # pybind11::str is unicode even under Python 2. expected["bytes"] = bytes() @@ -211,7 +211,7 @@ def test_constructors(): } inputs = {k.__name__: v for k, v in data.items()} expected = {k.__name__: k(v) for k, v in data.items()} - if str is bytes: # Similar to the above. See comments above. + if pytest.PY2: # Similar to the above. See comments above. inputs["bytes"] = b'41' inputs["str"] = 42 expected["bytes"] = b'41' @@ -254,13 +254,20 @@ def test_pybind11_str_raw_str(): valid_orig = u"DZ" valid_utf8 = valid_orig.encode("utf-8") valid_cvt = cvt(valid_utf8) - assert type(valid_cvt) == bytes # Probably surprising. - assert valid_cvt == b'\xc7\xb1' + assert type(valid_cvt) is unicode if pytest.PY2 else str # noqa: F821 + if pytest.PY2: + assert valid_cvt == valid_orig + else: + assert valid_cvt == u"b'\\xc7\\xb1'" malformed_utf8 = b'\x80' - malformed_cvt = cvt(malformed_utf8) - assert type(malformed_cvt) == bytes # Probably surprising. - assert malformed_cvt == b'\x80' + if pytest.PY2: + with pytest.raises(UnicodeDecodeError): + cvt(malformed_utf8) + else: + malformed_cvt = cvt(malformed_utf8) + assert type(malformed_cvt) is unicode if pytest.PY2 else str # noqa: F821 + assert malformed_cvt == u"b'\\x80'" def test_implicit_casting(): @@ -390,3 +397,27 @@ def test_memoryview_from_memory(): assert isinstance(view, memoryview) assert view.format == 'B' assert bytes(view) == b'\xff\xe1\xab\x37' + + +def test_isinstance_string_types(): + assert m.isinstance_pybind11_bytes(b"") + assert not m.isinstance_pybind11_bytes(u"") + + assert m.isinstance_pybind11_str(u"") + assert not m.isinstance_pybind11_str(b"") + + +def test_pass_bytes_or_unicode_to_string_types(): + assert m.pass_to_pybind11_bytes(b"Bytes") == 5 + with pytest.raises(TypeError): + m.pass_to_pybind11_bytes(u"Str") # NO implicit encode + + assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode + assert m.pass_to_pybind11_str(u"Str") == 3 + + assert m.pass_to_std_string(b"Bytes") == 5 + assert m.pass_to_std_string(u"Str") == 3 + + malformed_utf8 = b"\x80" + with pytest.raises(UnicodeDecodeError): + m.pass_to_pybind11_str(malformed_utf8)