diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index c1d0bbc34d..0b97a44680 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -1629,7 +1629,25 @@ struct pyobject_caster { template ::value, int> = 0> bool load(handle src, bool /* convert */) { value = src; return static_cast(value); } +#ifdef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES template ::value, int> = 0> +#else + template ::value, int> = 0> + bool load(handle src, bool /* convert */) { + if (isinstance(src)) { + PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr); + if (!str_from_bytes) throw error_already_set(); + value = reinterpret_steal(str_from_bytes); + return true; + } + if (!isinstance(src)) + return false; + value = reinterpret_borrow(src); + return true; + } + + template ::value && !std::is_same::value, int> = 0> +#endif bool load(handle src, bool /* convert */) { if (!isinstance(src)) return false; diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 9000668b91..75c46d6b2f 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) { inline bool PyNone_Check(PyObject *o) { return o == Py_None; } inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } -inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } - inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } class kwargs_proxy : public handle { @@ -782,7 +780,9 @@ PYBIND11_NAMESPACE_END(detail) Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \ PYBIND11_DEPRECATED("Use py::isinstance(obj) instead") \ bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \ - static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } + static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \ + template \ + Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } #define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \ PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ @@ -792,9 +792,7 @@ PYBIND11_NAMESPACE_END(detail) { if (!m_ptr) throw error_already_set(); } \ Name(object &&o) \ : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ - { if (!m_ptr) throw error_already_set(); } \ - template \ - Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } + { if (!m_ptr) throw error_already_set(); } #define PYBIND11_OBJECT(Name, Parent, CheckFun) \ PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ @@ -885,7 +883,7 @@ class bytes; class str : public object { public: - PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str) + PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str) str(const char *c, size_t n) : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) { @@ -930,11 +928,20 @@ class str : public object { private: /// Return string representation -- always returns a new reference, even if already a str static PyObject *raw_str(PyObject *op) { +#ifdef PYBIND11_STR_RAW_STR_PY2_EMULATE_UNICODE_CONSTRUCTOR_NOT_IMPLICIT_ENCODE +#if PY_MAJOR_VERSION < 3 + PyObject *str_value = PyObject_Unicode(op); +#else + PyObject *str_value = PyObject_Str(op); +#endif + if (!str_value) throw error_already_set(); +#else PyObject *str_value = PyObject_Str(op); if (!str_value) throw error_already_set(); #if PY_MAJOR_VERSION < 3 PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); Py_XDECREF(str_value); str_value = unicode; +#endif #endif return str_value; } diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 6c2bebda87..435970e0e3 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -144,7 +144,7 @@ template struct list_caster { using value_conv = make_caster; bool load(handle src, bool convert) { - if (!isinstance(src) || isinstance(src)) + if (!isinstance(src) || isinstance(src) || isinstance(src)) return false; auto s = reinterpret_borrow(src); value.clear(); diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index 9f7bc37dc6..d39aa95939 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -197,6 +197,7 @@ TEST_SUBMODULE(pytypes, m) { // test_constructors m.def("default_constructors", []() { return py::dict( + "bytes"_a=py::bytes(), "str"_a=py::str(), "bool"_a=py::bool_(), "int"_a=py::int_(), @@ -210,6 +211,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("converting_constructors", [](py::dict d) { return py::dict( + "bytes"_a=py::bytes(d["bytes"]), "str"_a=py::str(d["str"]), "bool"_a=py::bool_(d["bool"]), "int"_a=py::int_(d["int"]), @@ -225,6 +227,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("cast_functions", [](py::dict d) { // When converting between Python types, obj.cast() should be the same as T(obj) return py::dict( + "bytes"_a=d["bytes"].cast(), "str"_a=d["str"].cast(), "bool"_a=d["bool"].cast(), "int"_a=d["int"].cast(), @@ -367,4 +370,11 @@ TEST_SUBMODULE(pytypes, m) { buf, static_cast(strlen(buf))); }); #endif + + m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance(o); }); + m.def("isinstance_pybind11_unicode", [](py::object o) { return py::isinstance(o); }); + + m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); }); + m.def("pass_to_pybind11_unicode", [](py::str s) { return py::len(s); }); + m.def("pass_to_std_string", [](std::string s) { return s.size(); }); } diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 4cfc707a32..a12039917c 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -188,11 +188,17 @@ def func(self, x, *args): def test_constructors(): """C++ default and converting constructors are equivalent to type calls in Python""" - types = [str, bool, int, float, tuple, list, dict, set] + types = [bytes, str, bool, int, float, tuple, list, dict, set] expected = {t.__name__: t() for t in types} + if str is bytes: # Python 2. + # Note that bytes.__name__ == 'str' in Python 2. + # pybind11::str is unicode even under Python 2. + expected["bytes"] = bytes() + expected["str"] = u"" # flake8 complains about unicode(). assert m.default_constructors() == expected data = { + bytes: b'41', # Currently no supported or working conversions. str: 42, bool: "Not empty", int: "42", @@ -205,6 +211,11 @@ def test_constructors(): } inputs = {k.__name__: v for k, v in data.items()} expected = {k.__name__: k(v) for k, v in data.items()} + if str is bytes: # Similar to the above. See comments above. + inputs["bytes"] = b'41' + inputs["str"] = 42 + expected["bytes"] = b'41' + expected["str"] = u"42" assert m.converting_constructors(inputs) == expected assert m.cast_functions(inputs) == expected @@ -347,3 +358,46 @@ def test_memoryview_from_memory(): assert isinstance(view, memoryview) assert view.format == 'B' assert bytes(view) == b'\xff\xe1\xab\x37' + + +def test_isinstance_string_types(): + actual_bytes = b"" + actual_unicode = u"" + if str is bytes: + # Python 2: NOT same as native str, BUT same as pybind11::str + native_unicode_type = unicode # noqa: F821 + else: + # Python 3: same as pybind11::str + native_unicode_type = str + + # Native isinstance, for comparison with below. + assert isinstance(actual_bytes, bytes) + assert not isinstance(actual_unicode, bytes) + assert not isinstance(actual_bytes, native_unicode_type) + assert isinstance(actual_unicode, native_unicode_type) + + # pybind11 isinstance + assert m.isinstance_pybind11_bytes(actual_bytes) + assert not m.isinstance_pybind11_bytes(actual_unicode) + assert not m.isinstance_pybind11_unicode(actual_bytes) + assert m.isinstance_pybind11_unicode(actual_unicode) + + +def test_pass_actual_bytes_or_unicode_to_string_types(): + actual_bytes = b"Bytes" + actual_unicode = u"Str" + + assert m.pass_to_pybind11_bytes(actual_bytes) == 5 + with pytest.raises(TypeError): + m.pass_to_pybind11_bytes(actual_unicode) # NO implicit encode + + assert m.pass_to_pybind11_unicode(actual_bytes) == 5 # implicit decode + assert m.pass_to_pybind11_unicode(actual_unicode) == 3 + + assert m.pass_to_std_string(actual_bytes) == 5 + assert m.pass_to_std_string(actual_unicode) == 3 + + malformed_utf8 = b"\x80" + with pytest.raises(UnicodeDecodeError) as excinfo: + m.pass_to_pybind11_unicode(malformed_utf8) + assert 'invalid start byte' in str(excinfo.value)