Skip to content

Adding test_isinstance_string_types, with asserts simply matching cur… #2256

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
18 changes: 18 additions & 0 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -1629,7 +1629,25 @@ struct pyobject_caster {
template <typename T = type, enable_if_t<std::is_same<T, handle>::value, int> = 0>
bool load(handle src, bool /* convert */) { value = src; return static_cast<bool>(value); }

#ifdef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES
template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
#else
template <typename T = type, enable_if_t<std::is_same<T, str>::value, int> = 0>
bool load(handle src, bool /* convert */) {
if (isinstance<bytes>(src)) {
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
if (!str_from_bytes) throw error_already_set();
value = reinterpret_steal<type>(str_from_bytes);
return true;
}
if (!isinstance<type>(src))
return false;
value = reinterpret_borrow<type>(src);
return true;
}

template <typename T = type, enable_if_t<std::is_base_of<object, T>::value && !std::is_same<T, str>::value, int> = 0>
#endif
bool load(handle src, bool /* convert */) {
if (!isinstance<type>(src))
return false;
Expand Down
21 changes: 14 additions & 7 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -738,8 +738,6 @@ inline bool PyIterable_Check(PyObject *obj) {
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }

inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }

inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }

class kwargs_proxy : public handle {
Expand Down Expand Up @@ -782,7 +780,9 @@ PYBIND11_NAMESPACE_END(detail)
Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \
PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead") \
bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \
static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }
static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \
template <typename Policy_> \
Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }

#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
Expand All @@ -792,9 +792,7 @@ PYBIND11_NAMESPACE_END(detail)
{ if (!m_ptr) throw error_already_set(); } \
Name(object &&o) \
: Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
{ if (!m_ptr) throw error_already_set(); } \
template <typename Policy_> \
Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
{ if (!m_ptr) throw error_already_set(); }

#define PYBIND11_OBJECT(Name, Parent, CheckFun) \
PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
Expand Down Expand Up @@ -885,7 +883,7 @@ class bytes;

class str : public object {
public:
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str)

str(const char *c, size_t n)
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
Expand Down Expand Up @@ -930,11 +928,20 @@ class str : public object {
private:
/// Return string representation -- always returns a new reference, even if already a str
static PyObject *raw_str(PyObject *op) {
#ifdef PYBIND11_STR_RAW_STR_PY2_EMULATE_UNICODE_CONSTRUCTOR_NOT_IMPLICIT_ENCODE
#if PY_MAJOR_VERSION < 3
PyObject *str_value = PyObject_Unicode(op);
#else
PyObject *str_value = PyObject_Str(op);
#endif
if (!str_value) throw error_already_set();
#else
PyObject *str_value = PyObject_Str(op);
if (!str_value) throw error_already_set();
#if PY_MAJOR_VERSION < 3
PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
Py_XDECREF(str_value); str_value = unicode;
#endif
#endif
return str_value;
}
Expand Down
2 changes: 1 addition & 1 deletion include/pybind11/stl.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
using value_conv = make_caster<Value>;

bool load(handle src, bool convert) {
if (!isinstance<sequence>(src) || isinstance<str>(src))
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
return false;
auto s = reinterpret_borrow<sequence>(src);
value.clear();
Expand Down
10 changes: 10 additions & 0 deletions tests/test_pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ TEST_SUBMODULE(pytypes, m) {
// test_constructors
m.def("default_constructors", []() {
return py::dict(
"bytes"_a=py::bytes(),
"str"_a=py::str(),
"bool"_a=py::bool_(),
"int"_a=py::int_(),
Expand All @@ -210,6 +211,7 @@ TEST_SUBMODULE(pytypes, m) {

m.def("converting_constructors", [](py::dict d) {
return py::dict(
"bytes"_a=py::bytes(d["bytes"]),
"str"_a=py::str(d["str"]),
"bool"_a=py::bool_(d["bool"]),
"int"_a=py::int_(d["int"]),
Expand All @@ -225,6 +227,7 @@ TEST_SUBMODULE(pytypes, m) {
m.def("cast_functions", [](py::dict d) {
// When converting between Python types, obj.cast<T>() should be the same as T(obj)
return py::dict(
"bytes"_a=d["bytes"].cast<py::bytes>(),
"str"_a=d["str"].cast<py::str>(),
"bool"_a=d["bool"].cast<py::bool_>(),
"int"_a=d["int"].cast<py::int_>(),
Expand Down Expand Up @@ -367,4 +370,11 @@ TEST_SUBMODULE(pytypes, m) {
buf, static_cast<ssize_t>(strlen(buf)));
});
#endif

m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
m.def("isinstance_pybind11_unicode", [](py::object o) { return py::isinstance<py::str>(o); });

m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
m.def("pass_to_pybind11_unicode", [](py::str s) { return py::len(s); });
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
}
56 changes: 55 additions & 1 deletion tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,17 @@ def func(self, x, *args):

def test_constructors():
"""C++ default and converting constructors are equivalent to type calls in Python"""
types = [str, bool, int, float, tuple, list, dict, set]
types = [bytes, str, bool, int, float, tuple, list, dict, set]
expected = {t.__name__: t() for t in types}
if str is bytes: # Python 2.
# Note that bytes.__name__ == 'str' in Python 2.
# pybind11::str is unicode even under Python 2.
expected["bytes"] = bytes()
expected["str"] = u"" # flake8 complains about unicode().
assert m.default_constructors() == expected

data = {
bytes: b'41', # Currently no supported or working conversions.
str: 42,
bool: "Not empty",
int: "42",
Expand All @@ -205,6 +211,11 @@ def test_constructors():
}
inputs = {k.__name__: v for k, v in data.items()}
expected = {k.__name__: k(v) for k, v in data.items()}
if str is bytes: # Similar to the above. See comments above.
inputs["bytes"] = b'41'
inputs["str"] = 42
expected["bytes"] = b'41'
expected["str"] = u"42"

assert m.converting_constructors(inputs) == expected
assert m.cast_functions(inputs) == expected
Expand Down Expand Up @@ -347,3 +358,46 @@ def test_memoryview_from_memory():
assert isinstance(view, memoryview)
assert view.format == 'B'
assert bytes(view) == b'\xff\xe1\xab\x37'


def test_isinstance_string_types():
actual_bytes = b""
actual_unicode = u""
if str is bytes:
# Python 2: NOT same as native str, BUT same as pybind11::str
native_unicode_type = unicode # noqa: F821
else:
# Python 3: same as pybind11::str
native_unicode_type = str

# Native isinstance, for comparison with below.
assert isinstance(actual_bytes, bytes)
assert not isinstance(actual_unicode, bytes)
assert not isinstance(actual_bytes, native_unicode_type)
assert isinstance(actual_unicode, native_unicode_type)

# pybind11 isinstance
assert m.isinstance_pybind11_bytes(actual_bytes)
assert not m.isinstance_pybind11_bytes(actual_unicode)
assert not m.isinstance_pybind11_unicode(actual_bytes)
assert m.isinstance_pybind11_unicode(actual_unicode)


def test_pass_actual_bytes_or_unicode_to_string_types():
actual_bytes = b"Bytes"
actual_unicode = u"Str"

assert m.pass_to_pybind11_bytes(actual_bytes) == 5
with pytest.raises(TypeError):
m.pass_to_pybind11_bytes(actual_unicode) # NO implicit encode

assert m.pass_to_pybind11_unicode(actual_bytes) == 5 # implicit decode
assert m.pass_to_pybind11_unicode(actual_unicode) == 3

assert m.pass_to_std_string(actual_bytes) == 5
assert m.pass_to_std_string(actual_unicode) == 3

malformed_utf8 = b"\x80"
with pytest.raises(UnicodeDecodeError) as excinfo:
m.pass_to_pybind11_unicode(malformed_utf8)
assert 'invalid start byte' in str(excinfo.value)