diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index 4901ea3911..5d52ca8589 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -1631,6 +1631,14 @@ struct pyobject_caster { template ::value, int> = 0> bool load(handle src, bool /* convert */) { +#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES + if (std::is_same::value && isinstance(src)) { + PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr); + if (!str_from_bytes) throw error_already_set(); + value = reinterpret_steal(str_from_bytes); + return true; + } +#endif if (!isinstance(src)) return false; value = reinterpret_borrow(src); diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index c322ff27ba..64805a4769 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -752,8 +752,6 @@ inline bool PyIterable_Check(PyObject *obj) { inline bool PyNone_Check(PyObject *o) { return o == Py_None; } inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } -inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } - inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } class kwargs_proxy : public handle { @@ -796,7 +794,9 @@ PYBIND11_NAMESPACE_END(detail) Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \ PYBIND11_DEPRECATED("Use py::isinstance(obj) instead") \ bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \ - static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } + static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \ + template \ + Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } #define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \ PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ @@ -806,9 +806,7 @@ PYBIND11_NAMESPACE_END(detail) { if (!m_ptr) throw error_already_set(); } \ Name(object &&o) \ : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ - { if (!m_ptr) throw error_already_set(); } \ - template \ - Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } + { if (!m_ptr) throw error_already_set(); } #define PYBIND11_OBJECT(Name, Parent, CheckFun) \ PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ @@ -899,7 +897,7 @@ class bytes; class str : public object { public: - PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str) + PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str) str(const char *c, size_t n) : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) { @@ -920,7 +918,7 @@ class str : public object { Return a string representation of the object. This is analogous to the ``str()`` function in Python. \endrst */ - explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { } + explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); } operator std::string() const { object temp = *this; @@ -945,8 +943,8 @@ class str : public object { /// Return string representation -- always returns a new reference, even if already a str static PyObject *raw_str(PyObject *op) { PyObject *str_value = PyObject_Str(op); - if (!str_value) throw error_already_set(); #if PY_MAJOR_VERSION < 3 + if (!str_value) throw error_already_set(); PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); Py_XDECREF(str_value); str_value = unicode; #endif diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 6c2bebda87..435970e0e3 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -144,7 +144,7 @@ template struct list_caster { using value_conv = make_caster; bool load(handle src, bool convert) { - if (!isinstance(src) || isinstance(src)) + if (!isinstance(src) || isinstance(src) || isinstance(src)) return false; auto s = reinterpret_borrow(src); value.clear(); diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index 9dae6e7d62..10e03b44d0 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -80,6 +80,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); }); m.def("str_from_object", [](const py::object& obj) { return py::str(obj); }); m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); }); + m.def("str_from_handle", [](py::handle h) { return py::str(h); }); m.def("str_format", []() { auto s1 = "{} + {} = {}"_s.format(1, 2, 3); @@ -197,6 +198,7 @@ TEST_SUBMODULE(pytypes, m) { // test_constructors m.def("default_constructors", []() { return py::dict( + "bytes"_a=py::bytes(), "str"_a=py::str(), "bool"_a=py::bool_(), "int"_a=py::int_(), @@ -210,6 +212,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("converting_constructors", [](py::dict d) { return py::dict( + "bytes"_a=py::bytes(d["bytes"]), "str"_a=py::str(d["str"]), "bool"_a=py::bool_(d["bool"]), "int"_a=py::int_(d["int"]), @@ -225,6 +228,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("cast_functions", [](py::dict d) { // When converting between Python types, obj.cast() should be the same as T(obj) return py::dict( + "bytes"_a=d["bytes"].cast(), "str"_a=d["str"].cast(), "bool"_a=d["bool"].cast(), "int"_a=d["int"].cast(), @@ -369,4 +373,11 @@ TEST_SUBMODULE(pytypes, m) { buf, static_cast(strlen(buf))); }); #endif + + m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance(o); }); + m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance(o); }); + + m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); }); + m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); }); + m.def("pass_to_std_string", [](std::string s) { return s.size(); }); } diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index c21ad61146..f2c5ddeeaf 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -104,11 +104,22 @@ def __repr__(self): assert m.str_from_object(A()) == "this is a str" assert m.repr_from_object(A()) == "this is a repr" + assert m.str_from_handle(A()) == "this is a str" s1, s2 = m.str_format() assert s1 == "1 + 2 = 3" assert s1 == s2 + malformed_utf8 = b"\x80" + if env.PY2: + with pytest.raises(UnicodeDecodeError): + assert m.str_from_object(malformed_utf8) + with pytest.raises(UnicodeDecodeError): + assert m.str_from_handle(malformed_utf8) + else: + assert m.str_from_object(malformed_utf8) == "b'\\x80'" + assert m.str_from_handle(malformed_utf8) == "b'\\x80'" + def test_bytes(doc): assert m.bytes_from_string().decode() == "foo" @@ -190,11 +201,17 @@ def func(self, x, *args): def test_constructors(): """C++ default and converting constructors are equivalent to type calls in Python""" - types = [str, bool, int, float, tuple, list, dict, set] + types = [bytes, str, bool, int, float, tuple, list, dict, set] expected = {t.__name__: t() for t in types} + if env.PY2: + # Note that bytes.__name__ == 'str' in Python 2. + # pybind11::str is unicode even under Python 2. + expected["bytes"] = bytes() + expected["str"] = u"" # flake8 complains about unicode(). assert m.default_constructors() == expected data = { + bytes: b'41', # Currently no supported or working conversions. str: 42, bool: "Not empty", int: "42", @@ -207,6 +224,11 @@ def test_constructors(): } inputs = {k.__name__: v for k, v in data.items()} expected = {k.__name__: k(v) for k, v in data.items()} + if env.PY2: # Similar to the above. See comments above. + inputs["bytes"] = b'41' + inputs["str"] = 42 + expected["bytes"] = b'41' + expected["str"] = u"42" assert m.converting_constructors(inputs) == expected assert m.cast_functions(inputs) == expected @@ -245,13 +267,20 @@ def test_pybind11_str_raw_str(): valid_orig = u"DZ" valid_utf8 = valid_orig.encode("utf-8") valid_cvt = cvt(valid_utf8) - assert type(valid_cvt) == bytes # Probably surprising. - assert valid_cvt == b'\xc7\xb1' + assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821 + if env.PY2: + assert valid_cvt == valid_orig + else: + assert valid_cvt == u"b'\\xc7\\xb1'" malformed_utf8 = b'\x80' - malformed_cvt = cvt(malformed_utf8) - assert type(malformed_cvt) == bytes # Probably surprising. - assert malformed_cvt == b'\x80' + if env.PY2: + with pytest.raises(UnicodeDecodeError): + cvt(malformed_utf8) + else: + malformed_cvt = cvt(malformed_utf8) + assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821 + assert malformed_cvt == u"b'\\x80'" def test_implicit_casting(): @@ -379,3 +408,27 @@ def test_memoryview_from_memory(): assert isinstance(view, memoryview) assert view.format == 'B' assert bytes(view) == b'\xff\xe1\xab\x37' + + +def test_isinstance_string_types(): + assert m.isinstance_pybind11_bytes(b"") + assert not m.isinstance_pybind11_bytes(u"") + + assert m.isinstance_pybind11_str(u"") + assert not m.isinstance_pybind11_str(b"") + + +def test_pass_bytes_or_unicode_to_string_types(): + assert m.pass_to_pybind11_bytes(b"Bytes") == 5 + with pytest.raises(TypeError): + m.pass_to_pybind11_bytes(u"Str") # NO implicit encode + + assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode + assert m.pass_to_pybind11_str(u"Str") == 3 + + assert m.pass_to_std_string(b"Bytes") == 5 + assert m.pass_to_std_string(u"Str") == 3 + + malformed_utf8 = b"\x80" + with pytest.raises(UnicodeDecodeError): + m.pass_to_pybind11_str(malformed_utf8)