Skip to content

Tracking PR: intermediate state of completed str/bytes cleaning up PRs #2348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
8 changes: 8 additions & 0 deletions include/pybind11/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,14 @@ struct pyobject_caster {

template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
bool load(handle src, bool /* convert */) {
#ifndef PYBIND11_DISABLE_IMPLICIT_STR_FROM_BYTES
if (std::is_same<T, str>::value && isinstance<bytes>(src)) {
PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
if (!str_from_bytes) throw error_already_set();
value = reinterpret_steal<type>(str_from_bytes);
return true;
}
#endif
if (!isinstance<type>(src))
return false;
value = reinterpret_borrow<type>(src);
Expand Down
16 changes: 7 additions & 9 deletions include/pybind11/pytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,6 @@ inline bool PyIterable_Check(PyObject *obj) {
inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }

inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }

inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }

class kwargs_proxy : public handle {
Expand Down Expand Up @@ -796,7 +794,9 @@ PYBIND11_NAMESPACE_END(detail)
Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \
PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead") \
bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \
static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }
static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \
template <typename Policy_> \
Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }

#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
Expand All @@ -806,9 +806,7 @@ PYBIND11_NAMESPACE_END(detail)
{ if (!m_ptr) throw error_already_set(); } \
Name(object &&o) \
: Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
{ if (!m_ptr) throw error_already_set(); } \
template <typename Policy_> \
Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
{ if (!m_ptr) throw error_already_set(); }

#define PYBIND11_OBJECT(Name, Parent, CheckFun) \
PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
Expand Down Expand Up @@ -899,7 +897,7 @@ class bytes;

class str : public object {
public:
PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str)
PYBIND11_OBJECT_CVT(str, object, PyUnicode_Check, raw_str)

str(const char *c, size_t n)
: object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) {
Expand All @@ -920,7 +918,7 @@ class str : public object {
Return a string representation of the object. This is analogous to
the ``str()`` function in Python.
\endrst */
explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { }
explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); }

operator std::string() const {
object temp = *this;
Expand All @@ -945,8 +943,8 @@ class str : public object {
/// Return string representation -- always returns a new reference, even if already a str
static PyObject *raw_str(PyObject *op) {
PyObject *str_value = PyObject_Str(op);
if (!str_value) throw error_already_set();
#if PY_MAJOR_VERSION < 3
if (!str_value) throw error_already_set();
PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
Py_XDECREF(str_value); str_value = unicode;
#endif
Expand Down
2 changes: 1 addition & 1 deletion include/pybind11/stl.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ template <typename Type, typename Value> struct list_caster {
using value_conv = make_caster<Value>;

bool load(handle src, bool convert) {
if (!isinstance<sequence>(src) || isinstance<str>(src))
if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
return false;
auto s = reinterpret_borrow<sequence>(src);
value.clear();
Expand Down
11 changes: 11 additions & 0 deletions tests/test_pytypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ TEST_SUBMODULE(pytypes, m) {
m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
m.def("str_from_object", [](const py::object& obj) { return py::str(obj); });
m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); });
m.def("str_from_handle", [](py::handle h) { return py::str(h); });

m.def("str_format", []() {
auto s1 = "{} + {} = {}"_s.format(1, 2, 3);
Expand Down Expand Up @@ -197,6 +198,7 @@ TEST_SUBMODULE(pytypes, m) {
// test_constructors
m.def("default_constructors", []() {
return py::dict(
"bytes"_a=py::bytes(),
"str"_a=py::str(),
"bool"_a=py::bool_(),
"int"_a=py::int_(),
Expand All @@ -210,6 +212,7 @@ TEST_SUBMODULE(pytypes, m) {

m.def("converting_constructors", [](py::dict d) {
return py::dict(
"bytes"_a=py::bytes(d["bytes"]),
"str"_a=py::str(d["str"]),
"bool"_a=py::bool_(d["bool"]),
"int"_a=py::int_(d["int"]),
Expand All @@ -225,6 +228,7 @@ TEST_SUBMODULE(pytypes, m) {
m.def("cast_functions", [](py::dict d) {
// When converting between Python types, obj.cast<T>() should be the same as T(obj)
return py::dict(
"bytes"_a=d["bytes"].cast<py::bytes>(),
"str"_a=d["str"].cast<py::str>(),
"bool"_a=d["bool"].cast<py::bool_>(),
"int"_a=d["int"].cast<py::int_>(),
Expand Down Expand Up @@ -369,4 +373,11 @@ TEST_SUBMODULE(pytypes, m) {
buf, static_cast<ssize_t>(strlen(buf)));
});
#endif

m.def("isinstance_pybind11_bytes", [](py::object o) { return py::isinstance<py::bytes>(o); });
m.def("isinstance_pybind11_str", [](py::object o) { return py::isinstance<py::str>(o); });

m.def("pass_to_pybind11_bytes", [](py::bytes b) { return py::len(b); });
m.def("pass_to_pybind11_str", [](py::str s) { return py::len(s); });
m.def("pass_to_std_string", [](std::string s) { return s.size(); });
}
65 changes: 59 additions & 6 deletions tests/test_pytypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,22 @@ def __repr__(self):

assert m.str_from_object(A()) == "this is a str"
assert m.repr_from_object(A()) == "this is a repr"
assert m.str_from_handle(A()) == "this is a str"

s1, s2 = m.str_format()
assert s1 == "1 + 2 = 3"
assert s1 == s2

malformed_utf8 = b"\x80"
if env.PY2:
with pytest.raises(UnicodeDecodeError):
assert m.str_from_object(malformed_utf8)
with pytest.raises(UnicodeDecodeError):
assert m.str_from_handle(malformed_utf8)
else:
assert m.str_from_object(malformed_utf8) == "b'\\x80'"
assert m.str_from_handle(malformed_utf8) == "b'\\x80'"


def test_bytes(doc):
assert m.bytes_from_string().decode() == "foo"
Expand Down Expand Up @@ -190,11 +201,17 @@ def func(self, x, *args):

def test_constructors():
"""C++ default and converting constructors are equivalent to type calls in Python"""
types = [str, bool, int, float, tuple, list, dict, set]
types = [bytes, str, bool, int, float, tuple, list, dict, set]
expected = {t.__name__: t() for t in types}
if env.PY2:
# Note that bytes.__name__ == 'str' in Python 2.
# pybind11::str is unicode even under Python 2.
expected["bytes"] = bytes()
expected["str"] = u"" # flake8 complains about unicode().
assert m.default_constructors() == expected

data = {
bytes: b'41', # Currently no supported or working conversions.
str: 42,
bool: "Not empty",
int: "42",
Expand All @@ -207,6 +224,11 @@ def test_constructors():
}
inputs = {k.__name__: v for k, v in data.items()}
expected = {k.__name__: k(v) for k, v in data.items()}
if env.PY2: # Similar to the above. See comments above.
inputs["bytes"] = b'41'
inputs["str"] = 42
expected["bytes"] = b'41'
expected["str"] = u"42"

assert m.converting_constructors(inputs) == expected
assert m.cast_functions(inputs) == expected
Expand Down Expand Up @@ -245,13 +267,20 @@ def test_pybind11_str_raw_str():
valid_orig = u"DZ"
valid_utf8 = valid_orig.encode("utf-8")
valid_cvt = cvt(valid_utf8)
assert type(valid_cvt) == bytes # Probably surprising.
assert valid_cvt == b'\xc7\xb1'
assert type(valid_cvt) is unicode if env.PY2 else str # noqa: F821
if env.PY2:
assert valid_cvt == valid_orig
else:
assert valid_cvt == u"b'\\xc7\\xb1'"

malformed_utf8 = b'\x80'
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) == bytes # Probably surprising.
assert malformed_cvt == b'\x80'
if env.PY2:
with pytest.raises(UnicodeDecodeError):
cvt(malformed_utf8)
else:
malformed_cvt = cvt(malformed_utf8)
assert type(malformed_cvt) is unicode if env.PY2 else str # noqa: F821
assert malformed_cvt == u"b'\\x80'"


def test_implicit_casting():
Expand Down Expand Up @@ -379,3 +408,27 @@ def test_memoryview_from_memory():
assert isinstance(view, memoryview)
assert view.format == 'B'
assert bytes(view) == b'\xff\xe1\xab\x37'


def test_isinstance_string_types():
assert m.isinstance_pybind11_bytes(b"")
assert not m.isinstance_pybind11_bytes(u"")

assert m.isinstance_pybind11_str(u"")
assert not m.isinstance_pybind11_str(b"")


def test_pass_bytes_or_unicode_to_string_types():
assert m.pass_to_pybind11_bytes(b"Bytes") == 5
with pytest.raises(TypeError):
m.pass_to_pybind11_bytes(u"Str") # NO implicit encode

assert m.pass_to_pybind11_str(b"Bytes") == 5 # implicit decode
assert m.pass_to_pybind11_str(u"Str") == 3

assert m.pass_to_std_string(b"Bytes") == 5
assert m.pass_to_std_string(u"Str") == 3

malformed_utf8 = b"\x80"
with pytest.raises(UnicodeDecodeError):
m.pass_to_pybind11_str(malformed_utf8)