@@ -471,8 +471,15 @@ template <typename type> class type_caster<std::reference_wrapper<type>> : publi
471
471
template <typename _T> using cast_op_type = pybind11::detail::cast_op_type<_T>
472
472
473
473
474
+ template <typename CharT> using is_std_char_type = any_of<
475
+ std::is_same<CharT, char >, /* std::string */
476
+ std::is_same<CharT, char16_t >, /* std::u16string */
477
+ std::is_same<CharT, char32_t >, /* std::u32string */
478
+ std::is_same<CharT, wchar_t > /* std::wstring */
479
+ >;
480
+
474
481
template <typename T>
475
- struct type_caster <T, enable_if_t <std::is_arithmetic<T>::value>> {
482
+ struct type_caster <T, enable_if_t <std::is_arithmetic<T>::value && !is_std_char_type<T>::value >> {
476
483
using _py_type_0 = conditional_t <sizeof (T) <= sizeof (long ), long , long long >;
477
484
using _py_type_1 = conditional_t <std::is_signed<T>::value, _py_type_0, typename std::make_unsigned<_py_type_0>::type>;
478
485
using py_type = conditional_t <std::is_floating_point<T>::value, double , _py_type_1>;
@@ -617,122 +624,144 @@ template <> class type_caster<bool> {
617
624
PYBIND11_TYPE_CASTER (bool , _(" bool" ));
618
625
};
619
626
620
- template <> class type_caster <std::string> {
621
- public:
622
- bool load (handle src, bool ) {
623
- object temp;
624
- handle load_src = src;
625
- if (!src) {
626
- return false ;
627
- } else if (PyUnicode_Check (load_src.ptr ())) {
628
- temp = reinterpret_steal<object>(PyUnicode_AsUTF8String (load_src.ptr ()));
629
- if (!temp) { PyErr_Clear (); return false ; } // UnicodeEncodeError
630
- load_src = temp;
631
- }
632
- char *buffer;
633
- ssize_t length;
634
- int err = PYBIND11_BYTES_AS_STRING_AND_SIZE (load_src.ptr (), &buffer, &length);
635
- if (err == -1 ) { PyErr_Clear (); return false ; } // TypeError
636
- value = std::string (buffer, (size_t ) length);
637
- success = true ;
638
- return true ;
639
- }
640
-
641
- static handle cast (const std::string &src, return_value_policy /* policy */ , handle /* parent */ ) {
642
- return PyUnicode_FromStringAndSize (src.c_str (), (ssize_t ) src.length ());
643
- }
644
-
645
- PYBIND11_TYPE_CASTER (std::string, _(PYBIND11_STRING_NAME));
646
- protected:
647
- bool success = false ;
648
- };
627
+ // Helper class for UTF-{8,16,32} C++ stl strings:
628
+ template <typename CharT, class Traits , class Allocator >
629
+ struct type_caster <std::basic_string<CharT, Traits, Allocator>, enable_if_t <is_std_char_type<CharT>::value>> {
630
+ // Simplify life by being able to assume standard char sizes (the standard only guarantees
631
+ // minimums), but Python requires exact sizes
632
+ static_assert (!std::is_same<CharT, char >::value || sizeof (CharT) == 1 , " Unsupported char size != 1" );
633
+ static_assert (!std::is_same<CharT, char16_t >::value || sizeof (CharT) == 2 , " Unsupported char16_t size != 2" );
634
+ static_assert (!std::is_same<CharT, char32_t >::value || sizeof (CharT) == 4 , " Unsupported char32_t size != 4" );
635
+ // wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
636
+ static_assert (!std::is_same<CharT, wchar_t >::value || sizeof (CharT) == 2 || sizeof (CharT) == 4 ,
637
+ " Unsupported wchar_t size != 2/4" );
638
+ static constexpr size_t UTF_N = 8 * sizeof (CharT);
639
+ static constexpr const char *encoding = UTF_N == 8 ? " utf8" : UTF_N == 16 ? " utf16" : " utf32" ;
640
+
641
+ using StringType = std::basic_string<CharT, Traits, Allocator>;
649
642
650
- template <> class type_caster <std::wstring> {
651
- public:
652
643
bool load (handle src, bool ) {
644
+ #if PY_VERSION_MAJOR < 3
653
645
object temp;
646
+ #endif
654
647
handle load_src = src;
655
648
if (!src) {
656
649
return false ;
657
650
} else if (!PyUnicode_Check (load_src.ptr ())) {
651
+ #if PY_VERSION_MAJOR >= 3
652
+ return false ;
653
+ // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
654
+ #else
658
655
temp = reinterpret_steal<object>(PyUnicode_FromObject (load_src.ptr ()));
659
656
if (!temp) { PyErr_Clear (); return false ; }
660
657
load_src = temp;
661
- }
662
- wchar_t *buffer = nullptr ;
663
- ssize_t length = -1 ;
664
- #if PY_MAJOR_VERSION >= 3
665
- buffer = PyUnicode_AsWideCharString (load_src.ptr (), &length);
666
- #else
667
- temp = reinterpret_steal<object>(PyUnicode_AsEncodedString (
668
- load_src.ptr (), sizeof (wchar_t ) == sizeof (short )
669
- ? " utf16" : " utf32" , nullptr ));
670
-
671
- if (temp) {
672
- int err = PYBIND11_BYTES_AS_STRING_AND_SIZE (temp.ptr (), (char **) &buffer, &length);
673
- if (err == -1 ) { buffer = nullptr ; } // TypeError
674
- length = length / (ssize_t ) sizeof (wchar_t ) - 1 ; ++buffer; // Skip BOM
675
- }
676
658
#endif
677
- if (!buffer) { PyErr_Clear (); return false ; }
678
- value = std::wstring (buffer, (size_t ) length);
679
- success = true ;
659
+ }
660
+
661
+ object utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString (
662
+ load_src.ptr (), encoding, nullptr ));
663
+ if (!utfNbytes) { PyErr_Clear (); return false ; }
664
+
665
+ const CharT *buffer = reinterpret_cast <const CharT *>(PYBIND11_BYTES_AS_STRING (utfNbytes.ptr ()));
666
+ size_t length = (size_t ) PYBIND11_BYTES_SIZE (utfNbytes.ptr ()) / sizeof (CharT);
667
+ if (UTF_N > 8 ) { buffer++; length--; } // Skip BOM for UTF-16/32
668
+ value = StringType (buffer, length);
680
669
return true ;
681
670
}
682
671
683
- static handle cast (const std::wstring &src, return_value_policy /* policy */ , handle /* parent */ ) {
684
- return PyUnicode_FromWideChar (src.c_str (), (ssize_t ) src.length ());
672
+ static handle cast (const StringType &src, return_value_policy /* policy */ , handle /* parent */ ) {
673
+ const char *buffer = reinterpret_cast <const char *>(src.c_str ());
674
+ ssize_t nbytes = ssize_t (src.size () * sizeof (CharT));
675
+ handle s = PyUnicode_Decode (buffer, nbytes, encoding, nullptr );
676
+ if (!s) throw error_already_set ();
677
+ return s;
685
678
}
686
679
687
- PYBIND11_TYPE_CASTER (std::wstring, _(PYBIND11_STRING_NAME));
688
- protected:
689
- bool success = false ;
680
+ PYBIND11_TYPE_CASTER (StringType, _(PYBIND11_STRING_NAME));
690
681
};
691
682
692
- template <> class type_caster <char > : public type_caster<std::string> {
683
+ // Type caster for C-style strings. We basically use a std::string type caster, but also add the
684
+ // ability to use None as a nullptr char* (which the string caster doesn't allow).
685
+ template <typename CharT> struct type_caster <CharT, enable_if_t <is_std_char_type<CharT>::value>> {
686
+ using StringType = std::basic_string<CharT>;
687
+ using StringCaster = type_caster<StringType>;
688
+ StringCaster str_caster;
689
+ bool none = false ;
693
690
public:
694
691
bool load (handle src, bool convert) {
695
- if (src.is_none ()) return true ;
696
- return type_caster<std::string>::load (src, convert);
697
- }
698
-
699
- static handle cast (const char *src, return_value_policy /* policy */ , handle /* parent */ ) {
700
- if (src == nullptr ) return none ().inc_ref ();
701
- return PyUnicode_FromString (src);
692
+ if (!src) return false ;
693
+ if (src.is_none ()) {
694
+ // Defer accepting None to other overloads (if we aren't in convert mode):
695
+ if (!convert) return false ;
696
+ none = true ;
697
+ return true ;
698
+ }
699
+ return str_caster.load (src, convert);
702
700
}
703
701
704
- static handle cast (char src, return_value_policy /* policy */ , handle /* parent */ ) {
705
- char str[ 2 ] = { src, ' \0 ' } ;
706
- return PyUnicode_DecodeLatin1 (str, 1 , nullptr );
702
+ static handle cast (const CharT * src, return_value_policy policy, handle parent) {
703
+ if (src == nullptr ) return pybind11::none (). inc_ref () ;
704
+ return StringCaster::cast ( StringType (src), policy, parent );
707
705
}
708
706
709
- operator char *() { return success ? const_cast <char *>(value.c_str ()) : nullptr ; }
710
- operator char &() { return value[0 ]; }
711
-
712
- static PYBIND11_DESCR name () { return type_descr (_ (PYBIND11_STRING_NAME)); }
713
- };
707
+ static handle cast (CharT src, return_value_policy policy, handle parent) {
708
+ if (std::is_same<char , CharT>::value) {
709
+ handle s = PyUnicode_DecodeLatin1 ((const char *) &src, 1 , nullptr );
710
+ if (!s) throw error_already_set ();
711
+ return s;
712
+ }
713
+ return StringCaster::cast (StringType (1 , src), policy, parent);
714
+ }
715
+
716
+ operator CharT*() { return none ? nullptr : const_cast <CharT *>(static_cast <StringType &>(str_caster).c_str ()); }
717
+ operator CharT () {
718
+ if (none)
719
+ throw value_error (" Cannot convert None to a character" );
720
+
721
+ auto &value = static_cast <StringType &>(str_caster);
722
+ size_t str_len = value.size ();
723
+ if (str_len == 0 )
724
+ throw value_error (" Cannot convert empty string to a character" );
725
+
726
+ // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that
727
+ // is too high, and one for multiple unicode characters (caught later), so we need to figure
728
+ // out how long the first encoded character is in bytes to distinguish between these two
729
+ // errors. We also allow want to allow unicode characters U+0080 through U+00FF, as those
730
+ // can fit into a single char value.
731
+ if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4 ) {
732
+ unsigned char v0 = static_cast <unsigned char >(value[0 ]);
733
+ size_t char0_bytes = !(v0 & 0x80 ) ? 1 : // low bits only: 0-127
734
+ (v0 & 0xE0 ) == 0xC0 ? 2 : // 0b110xxxxx - start of 2-byte sequence
735
+ (v0 & 0xF0 ) == 0xE0 ? 3 : // 0b1110xxxx - start of 3-byte sequence
736
+ 4 ; // 0b11110xxx - start of 4-byte sequence
737
+
738
+ if (char0_bytes == str_len) {
739
+ // If we have a 128-255 value, we can decode it into a single char:
740
+ if (char0_bytes == 2 && (v0 & 0xFC ) == 0xC0 ) { // 0x110000xx 0x10xxxxxx
741
+ return static_cast <CharT>(((v0 & 3 ) << 6 ) + (static_cast <unsigned char >(value[1 ]) & 0x3F ));
742
+ }
743
+ // Otherwise we have a single character, but it's > U+00FF
744
+ throw value_error (" Character code point not in range(0x100)" );
745
+ }
746
+ }
714
747
715
- template <> class type_caster <wchar_t > : public type_caster<std::wstring> {
716
- public:
717
- bool load (handle src, bool convert) {
718
- if (src.is_none ()) return true ;
719
- return type_caster<std::wstring>::load (src, convert);
720
- }
748
+ // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a
749
+ // surrogate pair with total length 2 instantly indicates a range error (but not a "your
750
+ // string was too long" error).
751
+ else if (StringCaster::UTF_N == 16 && str_len == 2 ) {
752
+ char16_t v0 = static_cast <char16_t >(value[0 ]);
753
+ if (v0 >= 0xD800 && v0 < 0xE000 )
754
+ throw value_error (" Character code point not in range(0x10000)" );
755
+ }
721
756
722
- static handle cast (const wchar_t *src, return_value_policy /* policy */ , handle /* parent */ ) {
723
- if (src == nullptr ) return none ().inc_ref ();
724
- return PyUnicode_FromWideChar (src, (ssize_t ) wcslen (src));
725
- }
757
+ if (str_len != 1 )
758
+ throw value_error (" Expected a character, but multi-character string found" );
726
759
727
- static handle cast (wchar_t src, return_value_policy /* policy */ , handle /* parent */ ) {
728
- wchar_t wstr[2 ] = { src, L' \0 ' };
729
- return PyUnicode_FromWideChar (wstr, 1 );
760
+ return value[0 ];
730
761
}
731
762
732
- operator wchar_t *() { return success ? const_cast <wchar_t *>(value.c_str ()) : nullptr ; }
733
- operator wchar_t &() { return value[0 ]; }
734
-
735
763
static PYBIND11_DESCR name () { return type_descr (_ (PYBIND11_STRING_NAME)); }
764
+ template <typename _T> using cast_op_type = typename std::remove_reference<pybind11::detail::cast_op_type<_T>>::type;
736
765
};
737
766
738
767
template <typename T1, typename T2> class type_caster <std::pair<T1, T2>> {
0 commit comments