@@ -624,33 +624,38 @@ template <> class type_caster<bool> {
624
624
PYBIND11_TYPE_CASTER (bool , _(" bool" ));
625
625
};
626
626
627
- // Helper class for UTF-{8,16,32} strings:
627
+ // Helper class for UTF-{8,16,32} C++ stl strings:
628
628
template <typename CharT, class Traits , class Allocator >
629
629
struct type_caster <std::basic_string<CharT, Traits, Allocator>, enable_if_t <is_std_char_type<CharT>::value>> {
630
- static constexpr unsigned int UTF_N =
631
- std::is_same<CharT, char >::value ? 8 :
632
- std::is_same<CharT, char16_t >::value ? 16 :
633
- std::is_same<CharT, char32_t >::value ? 32 :
634
- (sizeof (CharT) == 2 ? 16 : 32 ); /* std::wstring is UTF-16 on Windows, UTF-32 everywhere else */
635
-
630
+ // Simplify life by being able to assume standard char sizes (the standard only guarantees
631
+ // minimums), but Python requires exact sizes
632
+ static_assert (!std::is_same<CharT, char >::value || sizeof (CharT) == 1 , " Unsupported char size != 1" );
633
+ static_assert (!std::is_same<CharT, char16_t >::value || sizeof (CharT) == 2 , " Unsupported char16_t size != 2" );
634
+ static_assert (!std::is_same<CharT, char32_t >::value || sizeof (CharT) == 4 , " Unsupported char32_t size != 4" );
635
+ // wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
636
+ static_assert (!std::is_same<CharT, wchar_t >::value || sizeof (CharT) == 2 || sizeof (CharT) == 4 ,
637
+ " Unsupported wchar_t size != 2/4" );
638
+ static constexpr size_t UTF_N = 8 * sizeof (CharT);
636
639
static constexpr const char *encoding = UTF_N == 8 ? " utf8" : UTF_N == 16 ? " utf16" : " utf32" ;
637
640
638
- // C++ only requires char/char16_t/char32_t to be at least 8/16/32 bits, but Python's encoding
639
- // assumes exactly 1/2/4 bytes:
640
- static_assert (sizeof (CharT) == UTF_N / 8 ,
641
- " Internal error: string type_caster requires 1/2/4-sized character types" );
642
-
643
641
using StringType = std::basic_string<CharT, Traits, Allocator>;
644
642
645
643
bool load (handle src, bool ) {
644
+ #if PY_VERSION_MAJOR < 3
646
645
object temp;
646
+ #endif
647
647
handle load_src = src;
648
648
if (!src) {
649
649
return false ;
650
650
} else if (!PyUnicode_Check (load_src.ptr ())) {
651
+ #if PY_VERSION_MAJOR >= 3
652
+ return false ;
653
+ // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
654
+ #else
651
655
temp = reinterpret_steal<object>(PyUnicode_FromObject (load_src.ptr ()));
652
656
if (!temp) { PyErr_Clear (); return false ; }
653
657
load_src = temp;
658
+ #endif
654
659
}
655
660
656
661
object utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString (
@@ -661,7 +666,6 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
661
666
size_t length = (size_t ) PYBIND11_BYTES_SIZE (utfNbytes.ptr ()) / sizeof (CharT);
662
667
if (UTF_N > 8 ) { buffer++; length--; } // Skip BOM for UTF-16/32
663
668
value = StringType (buffer, length);
664
- success = true ;
665
669
return true ;
666
670
}
667
671
@@ -674,24 +678,29 @@ struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_s
674
678
}
675
679
676
680
PYBIND11_TYPE_CASTER (StringType, _(PYBIND11_STRING_NAME));
677
- protected:
678
- bool success = false ;
679
681
};
680
682
681
- template <typename CharT> struct type_caster <CharT, enable_if_t <is_std_char_type<CharT>::value>>
682
- : type_caster<std::basic_string<CharT>> {
683
+ // Type caster for C-style strings. We basically use a std::string type caster, but also add the
684
+ // ability to use None as a nullptr char* (which the string caster doesn't allow).
685
+ template <typename CharT> struct type_caster <CharT, enable_if_t <is_std_char_type<CharT>::value>> {
683
686
using StringType = std::basic_string<CharT>;
684
687
using StringCaster = type_caster<StringType>;
685
- using StringCaster::success ;
686
- using StringCaster::value ;
688
+ StringCaster str_caster ;
689
+ bool none = false ;
687
690
public:
688
691
bool load (handle src, bool convert) {
689
- if (src.is_none ()) return true ;
690
- return StringCaster::load (src, convert);
692
+ if (!src) return false ;
693
+ if (src.is_none ()) {
694
+ // Defer accepting None to other overloads (if we aren't in convert mode):
695
+ if (!convert) return false ;
696
+ none = true ;
697
+ return true ;
698
+ }
699
+ return str_caster.load (src, convert);
691
700
}
692
701
693
702
static handle cast (const CharT *src, return_value_policy policy, handle parent) {
694
- if (src == nullptr ) return none ().inc_ref ();
703
+ if (src == nullptr ) return pybind11:: none ().inc_ref ();
695
704
return StringCaster::cast (StringType (src), policy, parent);
696
705
}
697
706
@@ -704,10 +713,55 @@ template <typename CharT> struct type_caster<CharT, enable_if_t<is_std_char_type
704
713
return StringCaster::cast (StringType (1 , src), policy, parent);
705
714
}
706
715
707
- operator CharT*() { return success ? const_cast <CharT *>(value.c_str ()) : nullptr ; }
708
- operator CharT&() { return value[0 ]; }
716
+ operator CharT*() { return none ? nullptr : const_cast <CharT *>(static_cast <StringType &>(str_caster).c_str ()); }
717
+ operator CharT () {
718
+ if (none)
719
+ throw value_error (" Cannot convert None to a character" );
720
+
721
+ auto &value = static_cast <StringType &>(str_caster);
722
+ size_t str_len = value.size ();
723
+ if (str_len == 0 )
724
+ throw value_error (" Cannot convert empty string to a character" );
725
+
726
+ // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that
727
+ // is too high, and one for multiple unicode characters (caught later), so we need to figure
728
+ // out how long the first encoded character is in bytes to distinguish between these two
729
+ // errors. We also allow want to allow unicode characters U+0080 through U+00FF, as those
730
+ // can fit into a single char value.
731
+ if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4 ) {
732
+ unsigned char v0 = static_cast <unsigned char >(value[0 ]);
733
+ size_t char0_bytes = !(v0 & 0x80 ) ? 1 : // low bits only: 0-127
734
+ (v0 & 0xE0 ) == 0xC0 ? 2 : // 0b110xxxxx - start of 2-byte sequence
735
+ (v0 & 0xF0 ) == 0xE0 ? 3 : // 0b1110xxxx - start of 3-byte sequence
736
+ 4 ; // 0b11110xxx - start of 4-byte sequence
737
+
738
+ if (char0_bytes == str_len) {
739
+ // If we have a 128-255 value, we can decode it into a single char:
740
+ if (char0_bytes == 2 && (v0 & 0xFC ) == 0xC0 ) { // 0x110000xx 0x10xxxxxx
741
+ return static_cast <char >(((v0 & 3 ) << 6 ) + (static_cast <unsigned char >(value[1 ]) & 0x3F ));
742
+ }
743
+ // Otherwise we have a single character, but it's > U+00FF
744
+ throw value_error (" Character code point not in range(0x100)" );
745
+ }
746
+ }
747
+
748
+ // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a
749
+ // surrogate pair with total length 2 instantly indicates a range error (but not a "your
750
+ // string was too long" error).
751
+ else if (StringCaster::UTF_N == 16 && str_len == 2 ) {
752
+ char16_t v0 = static_cast <char16_t >(value[0 ]);
753
+ if (v0 >= 0xD800 && v0 < 0xE000 )
754
+ throw value_error (" Character code point not in range(0x10000)" );
755
+ }
756
+
757
+ if (str_len != 1 )
758
+ throw value_error (" Expected a character, but multi-character string found" );
759
+
760
+ return value[0 ];
761
+ }
709
762
710
763
static PYBIND11_DESCR name () { return type_descr (_ (PYBIND11_STRING_NAME)); }
764
+ template <typename _T> using cast_op_type = typename std::remove_reference<pybind11::detail::cast_op_type<_T>>::type;
711
765
};
712
766
713
767
template <typename T1, typename T2> class type_caster <std::pair<T1, T2>> {
0 commit comments