From 65130b7a7e4d612a8ebb39b9bb43d57630caa682 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 31 Jul 2020 06:01:37 -0700 Subject: [PATCH 1/9] Fixing pybind11::bytes() ambiguous overload issue and adding missing bytes type to test_constructors(). (#2340) --- include/pybind11/pytypes.h | 8 ++++---- tests/test_pytypes.cpp | 3 +++ tests/test_pytypes.py | 30 ++++++++++++++++++------------ 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 9000668b91..3244ed8c52 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -782,7 +782,9 @@ PYBIND11_NAMESPACE_END(detail) Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \ PYBIND11_DEPRECATED("Use py::isinstance(obj) instead") \ bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \ - static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } + static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \ + template \ + Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } #define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \ PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ @@ -792,9 +794,7 @@ PYBIND11_NAMESPACE_END(detail) { if (!m_ptr) throw error_already_set(); } \ Name(object &&o) \ : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ - { if (!m_ptr) throw error_already_set(); } \ - template \ - Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } + { if (!m_ptr) throw error_already_set(); } #define PYBIND11_OBJECT(Name, Parent, CheckFun) \ PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index 9f7bc37dc6..c76b1f1dcb 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -197,6 +197,7 @@ TEST_SUBMODULE(pytypes, m) { // test_constructors m.def("default_constructors", []() { return py::dict( + "bytes"_a=py::bytes(), "str"_a=py::str(), "bool"_a=py::bool_(), "int"_a=py::int_(), @@ -210,6 +211,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("converting_constructors", [](py::dict d) { return py::dict( + "bytes"_a=py::bytes(d["bytes"]), "str"_a=py::str(d["str"]), "bool"_a=py::bool_(d["bool"]), "int"_a=py::int_(d["int"]), @@ -225,6 +227,7 @@ TEST_SUBMODULE(pytypes, m) { m.def("cast_functions", [](py::dict d) { // When converting between Python types, obj.cast() should be the same as T(obj) return py::dict( + "bytes"_a=d["bytes"].cast(), "str"_a=d["str"].cast(), "bool"_a=d["bool"].cast(), "int"_a=d["int"].cast(), diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 4cfc707a32..f095dc6b45 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -188,23 +188,29 @@ def func(self, x, *args): def test_constructors(): """C++ default and converting constructors are equivalent to type calls in Python""" - types = [str, bool, int, float, tuple, list, dict, set] + types = [bytes, str, bool, int, float, tuple, list, dict, set] expected = {t.__name__: t() for t in types} + if str is bytes: # Python 2. + # Note that bytes.__name__ == 'str' in Python 2. + # pybind11::str is unicode even under Python 2. + expected["bytes"] = bytes() + expected["str"] = u"" # flake8 complains about unicode(). assert m.default_constructors() == expected data = { - str: 42, - bool: "Not empty", - int: "42", - float: "+1e3", - tuple: range(3), - list: range(3), - dict: [("two", 2), ("one", 1), ("three", 3)], - set: [4, 4, 5, 6, 6, 6], - memoryview: b'abc' + "bytes": b'41', # Currently no supported or working conversions. + "str": 42, + "bool": "Not empty", + "int": "42", + "float": "+1e3", + "tuple": range(3), + "list": range(3), + "dict": [("two", 2), ("one", 1), ("three", 3)], + "set": [4, 4, 5, 6, 6, 6], + "memoryview": b'abc' } - inputs = {k.__name__: v for k, v in data.items()} - expected = {k.__name__: k(v) for k, v in data.items()} + inputs = {k: v for k, v in data.items()} + expected = {k: eval(k)(v) for k, v in data.items()} assert m.converting_constructors(inputs) == expected assert m.cast_functions(inputs) == expected From 314b0d5bf093a3251518db81fb81a8c1ede5a08c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 18 Jun 2020 12:09:23 -0700 Subject: [PATCH 2/9] Adding test_isinstance_string_types, with asserts simply matching current behavior. --- tests/test_pytypes.cpp | 7 +++++++ tests/test_pytypes.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/tests/test_pytypes.cpp b/tests/test_pytypes.cpp index c76b1f1dcb..d39aa95939 100644 --- a/tests/test_pytypes.cpp +++ b/tests/test_pytypes.cpp @@ -370,4 +370,11 @@ TEST_SUBMODULE(pytypes, m) { buf, static_cast(strlen(buf))); }); #endif + + m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance(o); }); + m.def("isinstance_pybind11_unicode", [](py::object o) { return py::isinstance(o); }); + + m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); }); + m.def("pass_to_pybind11_unicode", [](py::str s) { return py::len(s); }); + m.def("pass_to_std_string", [](std::string s) { return s.size(); }); } diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index f095dc6b45..6b31851e94 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -353,3 +353,41 @@ def test_memoryview_from_memory(): assert isinstance(view, memoryview) assert view.format == 'B' assert bytes(view) == b'\xff\xe1\xab\x37' + + +def test_isinstance_string_types(): + actual_bytes = b"" + actual_unicode = u"" + if str is bytes: + # Python 2: NOT same as native str, BUT same as pybind11::str + native_unicode_type = unicode # noqa: F821 + else: + # Python 3: same as pybind11::str + native_unicode_type = str + + # Native isinstance, for comparison with below. + assert isinstance(actual_bytes, bytes) + assert not isinstance(actual_unicode, bytes) + assert not isinstance(actual_bytes, native_unicode_type) + assert isinstance(actual_unicode, native_unicode_type) + + # pybind11 isinstance + assert m.isinstance_pybind11_bytes(actual_bytes) + assert not m.isinstance_pybind11_bytes(actual_unicode) + assert m.isinstance_pybind11_unicode(actual_bytes) # NOT like native + assert m.isinstance_pybind11_unicode(actual_unicode) + + +def test_pass_actual_bytes_or_unicode_to_string_types(): + actual_bytes = b"Bytes" + actual_unicode = u"Str" + + assert m.pass_to_pybind11_bytes(actual_bytes) == 5 + with pytest.raises(TypeError): + m.pass_to_pybind11_bytes(actual_unicode) # NO implicit encode + + assert m.pass_to_pybind11_unicode(actual_bytes) == 5 # implicit decode + assert m.pass_to_pybind11_unicode(actual_unicode) == 3 + + assert m.pass_to_std_string(actual_bytes) == 5 + assert m.pass_to_std_string(actual_unicode) == 3 From 4c1e79a711806c02ef0b30c668c42fa963408736 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 21 Jul 2020 14:51:20 -0700 Subject: [PATCH 3/9] adding isinstance specialization to match native behavior, fixing up tests --- include/pybind11/pytypes.h | 8 ++++++++ include/pybind11/stl.h | 2 +- tests/test_eval.py | 2 ++ tests/test_pytypes.py | 5 +++-- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 3244ed8c52..292ecd0a6e 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -948,6 +948,8 @@ inline namespace literals { inline str operator"" _s(const char *s, size_t size) { return {s, size}; } } +template <> inline bool isinstance(handle obj) { return PyUnicode_Check(obj.ptr()); } + /// \addtogroup pytypes /// @{ class bytes : public object { @@ -1010,6 +1012,12 @@ inline str::str(const bytes& b) { m_ptr = obj.release().ptr(); } +#if PY_MAJOR_VERSION < 3 +using native_str = bytes; +#else +using native_str = str; +#endif + /// \addtogroup pytypes /// @{ class none : public object { diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 6c2bebda87..63b5a5ea1b 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -144,7 +144,7 @@ template struct list_caster { using value_conv = make_caster; bool load(handle src, bool convert) { - if (!isinstance(src) || isinstance(src)) + if (!isinstance(src) || isinstance(src) || isinstance(src)) return false; auto s = reinterpret_borrow(src); value.clear(); diff --git a/tests/test_eval.py b/tests/test_eval.py index 66bec55f8b..89e7d92789 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -18,6 +18,8 @@ def test_evals(capture): @pytest.unsupported_on_pypy3 def test_eval_file(): filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py") + if isinstance(filename, bytes): # true for Python 2 only + filename = filename.decode() # effectively six.ensure_text() assert m.test_eval_file(filename) assert m.test_eval_file_failure() diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 6b31851e94..f8fd35e2aa 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -374,7 +374,7 @@ def test_isinstance_string_types(): # pybind11 isinstance assert m.isinstance_pybind11_bytes(actual_bytes) assert not m.isinstance_pybind11_bytes(actual_unicode) - assert m.isinstance_pybind11_unicode(actual_bytes) # NOT like native + assert not m.isinstance_pybind11_unicode(actual_bytes) assert m.isinstance_pybind11_unicode(actual_unicode) @@ -386,7 +386,8 @@ def test_pass_actual_bytes_or_unicode_to_string_types(): with pytest.raises(TypeError): m.pass_to_pybind11_bytes(actual_unicode) # NO implicit encode - assert m.pass_to_pybind11_unicode(actual_bytes) == 5 # implicit decode + with pytest.raises(TypeError): + m.pass_to_pybind11_unicode(actual_bytes) # NO implicit decode assert m.pass_to_pybind11_unicode(actual_unicode) == 3 assert m.pass_to_std_string(actual_bytes) == 5 From f416fa6d3e49f0f618e22c850023492e40e64d0a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 3 Aug 2020 14:00:00 -0700 Subject: [PATCH 4/9] reverting list_caster behavior to established: no more need for native_str --- include/pybind11/pytypes.h | 6 ------ include/pybind11/stl.h | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 292ecd0a6e..1e57c38635 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -1012,12 +1012,6 @@ inline str::str(const bytes& b) { m_ptr = obj.release().ptr(); } -#if PY_MAJOR_VERSION < 3 -using native_str = bytes; -#else -using native_str = str; -#endif - /// \addtogroup pytypes /// @{ class none : public object { diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h index 63b5a5ea1b..435970e0e3 100644 --- a/include/pybind11/stl.h +++ b/include/pybind11/stl.h @@ -144,7 +144,7 @@ template struct list_caster { using value_conv = make_caster; bool load(handle src, bool convert) { - if (!isinstance(src) || isinstance(src) || isinstance(src)) + if (!isinstance(src) || isinstance(src) || isinstance(src)) return false; auto s = reinterpret_borrow(src); value.clear(); From 423be9235c2a8b0941c6b5369c2957988740873b Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 3 Aug 2020 14:30:28 -0700 Subject: [PATCH 5/9] Replacing detail::PyUnicode_Check_Permissive with PyUnicode_Check. With that pybind11::str is guaranteed to be a PyUnicodeObject. --- include/pybind11/pytypes.h | 6 +----- tests/test_pytypes.py | 29 +++++++++++++++++------------ 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 1e57c38635..2217b974ab 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) { inline bool PyNone_Check(PyObject *o) { return o == Py_None; } inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } -inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } - inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } class kwargs_proxy : public handle { @@ -885,7 +883,7 @@ class bytes; class str : public object { public: - PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str) + PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str) str(const char *c, size_t n) : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) { @@ -948,8 +946,6 @@ inline namespace literals { inline str operator"" _s(const char *s, size_t size) { return {s, size}; } } -template <> inline bool isinstance(handle obj) { return PyUnicode_Check(obj.ptr()); } - /// \addtogroup pytypes /// @{ class bytes : public object { diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index f8fd35e2aa..da2a69e929 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -198,19 +198,24 @@ def test_constructors(): assert m.default_constructors() == expected data = { - "bytes": b'41', # Currently no supported or working conversions. - "str": 42, - "bool": "Not empty", - "int": "42", - "float": "+1e3", - "tuple": range(3), - "list": range(3), - "dict": [("two", 2), ("one", 1), ("three", 3)], - "set": [4, 4, 5, 6, 6, 6], - "memoryview": b'abc' + bytes: b'41', # Currently no supported or working conversions. + str: 42, + bool: "Not empty", + int: "42", + float: "+1e3", + tuple: range(3), + list: range(3), + dict: [("two", 2), ("one", 1), ("three", 3)], + set: [4, 4, 5, 6, 6, 6], + memoryview: b'abc' } - inputs = {k: v for k, v in data.items()} - expected = {k: eval(k)(v) for k, v in data.items()} + inputs = {k.__name__: v for k, v in data.items()} + expected = {k.__name__: k(v) for k, v in data.items()} + if str is bytes: # Similar to the above. See comments above. + inputs["bytes"] = b'41' + inputs["str"] = 42 + expected["bytes"] = b'41' + expected["str"] = u"42" assert m.converting_constructors(inputs) == expected assert m.cast_functions(inputs) == expected From e402c574dfe1e9e575ba3681bde1f150b651e84a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 3 Aug 2020 15:38:43 -0700 Subject: [PATCH 6/9] Making Python 2 pybind11::str::raw_string equivalent to the Python 3 version. --- include/pybind11/pytypes.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 2217b974ab..85423ab7b1 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -928,12 +928,12 @@ class str : public object { private: /// Return string representation -- always returns a new reference, even if already a str static PyObject *raw_str(PyObject *op) { - PyObject *str_value = PyObject_Str(op); - if (!str_value) throw error_already_set(); #if PY_MAJOR_VERSION < 3 - PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); - Py_XDECREF(str_value); str_value = unicode; + PyObject *str_value = PyObject_Unicode(op); +#else + PyObject *str_value = PyObject_Str(op); #endif + if (!str_value) throw error_already_set(); return str_value; } }; From f72a06521c6ff4c74f39bf500b6e972f26f51315 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 3 Aug 2020 18:50:33 -0700 Subject: [PATCH 7/9] Re-introducing implicit bytes-to-unicode conversion with specialized load in pyobject_caster --- include/pybind11/cast.h | 18 ++++++++++++++++++ tests/test_eval.py | 2 -- tests/test_pytypes.py | 3 +-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index c1d0bbc34d..0b97a44680 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -1629,7 +1629,25 @@ struct pyobject_caster { template ::value, int> = 0> bool load(handle src, bool /* convert */) { value = src; return static_cast(value); } +#ifdef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES template ::value, int> = 0> +#else + template ::value, int> = 0> + bool load(handle src, bool /* convert */) { + if (isinstance(src)) { + PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr); + if (!str_from_bytes) throw error_already_set(); + value = reinterpret_steal(str_from_bytes); + return true; + } + if (!isinstance(src)) + return false; + value = reinterpret_borrow(src); + return true; + } + + template ::value && !std::is_same::value, int> = 0> +#endif bool load(handle src, bool /* convert */) { if (!isinstance(src)) return false; diff --git a/tests/test_eval.py b/tests/test_eval.py index 89e7d92789..66bec55f8b 100644 --- a/tests/test_eval.py +++ b/tests/test_eval.py @@ -18,8 +18,6 @@ def test_evals(capture): @pytest.unsupported_on_pypy3 def test_eval_file(): filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py") - if isinstance(filename, bytes): # true for Python 2 only - filename = filename.decode() # effectively six.ensure_text() assert m.test_eval_file(filename) assert m.test_eval_file_failure() diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index da2a69e929..05e9f2da3c 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -391,8 +391,7 @@ def test_pass_actual_bytes_or_unicode_to_string_types(): with pytest.raises(TypeError): m.pass_to_pybind11_bytes(actual_unicode) # NO implicit encode - with pytest.raises(TypeError): - m.pass_to_pybind11_unicode(actual_bytes) # NO implicit decode + assert m.pass_to_pybind11_unicode(actual_bytes) == 5 # implicit decode assert m.pass_to_pybind11_unicode(actual_unicode) == 3 assert m.pass_to_std_string(actual_bytes) == 5 From fe32344bd5b85ce71e3a0491ca8700f1a4a40a39 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 4 Aug 2020 09:44:44 -0700 Subject: [PATCH 8/9] adding test: pass_to_pybind11_unicode(malformed_utf8) --- tests/test_pytypes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_pytypes.py b/tests/test_pytypes.py index 05e9f2da3c..a12039917c 100644 --- a/tests/test_pytypes.py +++ b/tests/test_pytypes.py @@ -396,3 +396,8 @@ def test_pass_actual_bytes_or_unicode_to_string_types(): assert m.pass_to_std_string(actual_bytes) == 5 assert m.pass_to_std_string(actual_unicode) == 3 + + malformed_utf8 = b"\x80" + with pytest.raises(UnicodeDecodeError) as excinfo: + m.pass_to_pybind11_unicode(malformed_utf8) + assert 'invalid start byte' in str(excinfo.value) From d7add69f4f38d4d30e36925a2c3831285947b425 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 4 Aug 2020 17:21:07 -0700 Subject: [PATCH 9/9] restoring original pybind11::str::raw_str implementation --- include/pybind11/pytypes.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h index 85423ab7b1..75c46d6b2f 100644 --- a/include/pybind11/pytypes.h +++ b/include/pybind11/pytypes.h @@ -928,12 +928,21 @@ class str : public object { private: /// Return string representation -- always returns a new reference, even if already a str static PyObject *raw_str(PyObject *op) { +#ifdef PYBIND11_STR_RAW_STR_PY2_EMULATE_UNICODE_CONSTRUCTOR_NOT_IMPLICIT_ENCODE #if PY_MAJOR_VERSION < 3 PyObject *str_value = PyObject_Unicode(op); #else PyObject *str_value = PyObject_Str(op); #endif if (!str_value) throw error_already_set(); +#else + PyObject *str_value = PyObject_Str(op); + if (!str_value) throw error_already_set(); +#if PY_MAJOR_VERSION < 3 + PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); + Py_XDECREF(str_value); str_value = unicode; +#endif +#endif return str_value; } };